1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_tools.hxx" 30 #include <tools/urlobj.hxx> 31 #include <tools/debug.hxx> 32 #include <tools/inetmime.hxx> 33 #include "com/sun/star/uno/Reference.hxx" 34 #include "com/sun/star/util/XStringWidth.hpp" 35 #include "osl/diagnose.h" 36 #include "osl/file.hxx" 37 #include "rtl/string.h" 38 #include "rtl/textenc.h" 39 #include "rtl/ustring.hxx" 40 #include "sal/types.h" 41 42 #ifndef INCLUDED_ALGORITHM 43 #include <algorithm> 44 #define INCLUDED_ALGORITHM 45 #endif 46 #ifndef INCLUDED_LIMITS 47 #include <limits> 48 #define INCLUDED_LIMITS 49 #endif 50 51 #include <string.h> 52 53 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj; 54 // unnamed namespaces don't work well yet... 55 56 using namespace com::sun; 57 58 //============================================================================ 59 // 60 // INetURLObject 61 // 62 //============================================================================ 63 64 /* The URI grammar (using RFC 2234 conventions). 65 66 Constructs of the form 67 {reference <rule1> using rule2} 68 stand for a rule matching the given rule1 specified in the given reference, 69 encoded to URI syntax using rule2 (as specified in this URI grammar). 70 71 72 ; RFC 1738, RFC 2396, RFC 2732, private 73 login = [user [":" password] "@"] hostport 74 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~") 75 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~") 76 hostport = host [":" port] 77 host = incomplete-hostname / hostname / IPv4address / IPv6reference 78 incomplete-hostname = *(domainlabel ".") domainlabel 79 hostname = *(domainlabel ".") toplabel ["."] 80 domainlabel = alphanum [*(alphanum / "-") alphanum] 81 toplabel = ALPHA [*(alphanum / "-") alphanum] 82 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 83 IPv6reference = "[" hexpart [":" IPv4address] "]" 84 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) 85 hexseq = hex4 *(":" hex4) 86 hex4 = 1*4HEXDIG 87 port = *DIGIT 88 escaped = "%" HEXDIG HEXDIG 89 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]" 90 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~" 91 alphanum = ALPHA / DIGIT 92 unreserved = alphanum / mark 93 uric = escaped / reserved / unreserved 94 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@" 95 96 97 ; RFC 1738, RFC 2396 98 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]] 99 segment = *pchar 100 101 102 ; RFC 1738, RFC 2396 103 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]] 104 segment = *(pchar / ";") 105 106 107 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&> 108 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)] 109 segment = *pchar 110 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")} 111 112 113 ; RFC 2368, RFC 2396 114 mailto-url = "MAILTO:" [to] [headers] 115 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} 116 headers = "?" header *("&" header) 117 header = hname "=" hvalue 118 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY" 119 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} 120 121 122 ; private (see RFC 1738, RFC 2396) 123 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]] 124 segment = *(pchar / ";") 125 126 127 ; RFC 1738, RFC 2396, RFC 2732 128 news-url = "NEWS:" grouppart 129 grouppart = "*" / group / article 130 group = alpha *(alphanum / "+" / "-" / "." / "_") 131 article = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "?" / "_" / "~") "@" host 132 133 134 ; private 135 private-url = "PRIVATE:" path ["?" *uric] 136 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") 137 138 139 ; private 140 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric] 141 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") 142 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") 143 144 145 ; private 146 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]] 147 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") 148 149 150 ; private 151 slot-url = "SLOT:" path ["?" *uric] 152 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") 153 154 155 ; private 156 macro-url = "MACRO:" path ["?" *uric] 157 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") 158 159 160 ; private 161 javascript-url = "JAVASCRIPT:" *uric 162 163 164 ; private (see RFC 2192) 165 imap-url = "IMAP://" user [";AUTH=" auth] "@" hostport "/" segment *("/" segment) ["/;UID=" nz_number] 166 user = 1*{RFC 2060 <CHAR8> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "=" / "_" / "~")} 167 auth = {RFC 2060 <atom> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "+" / "," / "-" / "." / "=" / "_" / "~")} 168 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / "=" / "@" / "_" / "~") 169 nz_number = {RFC 2060 <nz_number> using *DIGIT} 170 171 172 ; private 173 pop3-url = "POP3://" login ["/" ["<" *uric ">"]] 174 175 176 ; RFC 2397 177 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric 178 mediatype = [type "/" subtype] *(";" attribute "=" value) 179 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} 180 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} 181 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} 182 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")} 183 184 185 ; RFC 2392, RFC 2396 186 cid-url = "CID:" {RFC 822 <addr-spec> using *uric} 187 188 189 ; private 190 out-url = "OUT:///~" name ["/" *uric] 191 name = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "?" / "@" / "_" / "~" 192 193 194 ; private 195 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar) 196 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") 197 198 ; private 199 vim-url = "VIM://" +vimc [":" *vimc] ["/" [("INBOX" message) / ("NEWSGROUPS" ["/" [+vimc message]])]] 200 message = ["/" [+vimc [":" +DIGIT "." +DIGIT "." +DIGIT]]] 201 vimc = ("=" HEXDIG HEXDIG) / alphanum 202 203 204 ; private 205 uno-url = ".UNO:" path ["?" *uric] 206 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") 207 208 209 ; private 210 component-url = ".COMPONENT:" path ["?" *uric] 211 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~") 212 213 214 ; private 215 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric] 216 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~") 217 218 219 ; RFC 2255 220 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]] 221 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} 222 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} 223 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} 224 extension = ["!"] ["X-"] extoken ["=" exvalue] 225 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} 226 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")} 227 228 229 ; private 230 db-url = "DB:" *uric 231 232 233 ; private 234 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part 235 opaque_part = uric_no_slash *uric 236 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 237 238 239 ; private 240 vnd-sun-star-url = "VND.SUN.STAR.ODMA:" ["/" *uric_no_slash] 241 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 242 243 244 ; RFC 1738 245 telnet-url = "TELNET://" login ["/"] 246 247 248 ; private 249 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part 250 opaque_part = uric_no_slash *uric 251 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / "," 252 253 254 ; private 255 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment) 256 segment = *pchar 257 258 259 ; private 260 unknown-url = scheme ":" 1*uric 261 scheme = ALPHA *(alphanum / "+" / "-" / ".") 262 263 264 ; private (http://ubiqx.org/cifs/Appendix-D.html): 265 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]] 266 segment = *(pchar / ";") 267 */ 268 269 //============================================================================ 270 inline sal_Int32 INetURLObject::SubString::clear() 271 { 272 sal_Int32 nDelta = -m_nLength; 273 m_nBegin = -1; 274 m_nLength = 0; 275 return nDelta; 276 } 277 278 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString, 279 rtl::OUString const & rSubString) 280 { 281 rtl::OUString sTemp(rString.makeStringAndClear()); 282 sal_Int32 nDelta = set(sTemp, rSubString); 283 rString.append(sTemp); 284 return nDelta; 285 } 286 287 inline sal_Int32 INetURLObject::SubString::set(rtl::OUString & rString, 288 rtl::OUString const & rSubString) 289 { 290 sal_Int32 nDelta = rSubString.getLength() - m_nLength; 291 292 rString = rString.replaceAt(m_nBegin, m_nLength, rSubString); 293 294 m_nLength = rSubString.getLength(); 295 return nDelta; 296 } 297 298 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString, 299 rtl::OUString const & rSubString, 300 sal_Int32 nTheBegin) 301 { 302 m_nBegin = nTheBegin; 303 return set(rString, rSubString); 304 } 305 306 //============================================================================ 307 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta) 308 { 309 if (isPresent()) 310 m_nBegin = m_nBegin + nDelta; 311 } 312 313 //============================================================================ 314 int INetURLObject::SubString::compare(SubString const & rOther, 315 rtl::OUStringBuffer const & rThisString, 316 rtl::OUStringBuffer const & rOtherString) const 317 { 318 sal_Int32 len = std::min(m_nLength, rOther.m_nLength); 319 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin; 320 sal_Unicode const * end = p1 + len; 321 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin; 322 while (p1 != end) { 323 if (*p1 < *p2) { 324 return -1; 325 } else if (*p1 > *p2) { 326 return 1; 327 } 328 ++p1; 329 ++p2; 330 } 331 return m_nLength < rOther.m_nLength ? -1 332 : m_nLength > rOther.m_nLength ? 1 333 : 0; 334 } 335 336 //============================================================================ 337 struct INetURLObject::SchemeInfo 338 { 339 sal_Char const * m_pScheme; 340 sal_Char const * m_pPrefix; 341 sal_uInt16 m_nDefaultPort; 342 bool m_bAuthority; 343 bool m_bUser; 344 bool m_bAuth; 345 bool m_bPassword; 346 bool m_bHost; 347 bool m_bPort; 348 bool m_bHierarchical; 349 bool m_bQuery; 350 }; 351 352 //============================================================================ 353 struct INetURLObject::PrefixInfo 354 { 355 enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important! 356 357 sal_Char const * m_pPrefix; 358 sal_Char const * m_pTranslatedPrefix; 359 INetProtocol m_eScheme; 360 Kind m_eKind; 361 }; 362 363 //============================================================================ 364 static INetURLObject::SchemeInfo const aSchemeInfoMap[INET_PROT_END] 365 = { { "", "", 0, false, false, false, false, false, false, false, 366 false }, 367 { "ftp", "ftp://", 21, true, true, false, true, true, true, true, 368 false }, 369 { "http", "http://", 80, true, false, false, false, true, true, 370 true, true }, 371 { "file", "file://", 0, true, false, false, false, true, false, 372 true, false }, 373 { "mailto", "mailto:", 0, false, false, false, false, false, 374 false, false, true }, 375 { "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, false, 376 false, false, true, true, true, true }, 377 { "news", "news:", 0, false, false, false, false, false, false, false, 378 false }, 379 { "private", "private:", 0, false, false, false, false, false, 380 false, false, true }, 381 { "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false, 382 false, false, false, true, true }, 383 { "https", "https://", 443, true, false, false, false, true, true, 384 true, true }, 385 { "slot", "slot:", 0, false, false, false, false, false, false, 386 false, true }, 387 { "macro", "macro:", 0, false, false, false, false, false, false, 388 false, true }, 389 { "javascript", "javascript:", 0, false, false, false, false, 390 false, false, false, false }, 391 { "imap", "imap://", 143, true, true, true, false, true, true, 392 true, false }, 393 { "pop3", "pop3://", 110, true, true, false, true, true, true, 394 false, false }, 395 { "data", "data:", 0, false, false, false, false, false, false, 396 false, false }, 397 { "cid", "cid:", 0, false, false, false, false, false, false, 398 false, false }, 399 { "out", "out://", 0, true, false, false, false, false, false, 400 false, false }, 401 { "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false, 402 false, false, false, true, false }, 403 { "vim", "vim://", 0, true, true, false, true, false, false, true, 404 false }, 405 { ".uno", ".uno:", 0, false, false, false, false, false, false, 406 false, true }, 407 { ".component", ".component:", 0, false, false, false, false, 408 false, false, false, true }, 409 { "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false, 410 false, false, false, true, true }, 411 { "ldap", "ldap://", 389, true, false, false, false, true, true, 412 false, true }, 413 { "db", "db:", 0, false, false, false, false, false, false, false, 414 false }, 415 { "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false, 416 false, false, false, false, false }, 417 { "vnd.sun.star.odma", "vnd.sun.star.odma:", 0, false, false, false, 418 false, false, false, true, false }, 419 { "telnet", "telnet://", 23, true, true, false, true, true, true, true, 420 false }, 421 { "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false, false, 422 false, false, false, false, false }, 423 { "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false, 424 false, false, false, true, false }, 425 { "", "", 0, false, false, false, false, true, true, true, false }, 426 { "smb", "smb://", 139, true, true, false, true, true, true, true, 427 true }, 428 { "hid", "hid:", 0, false, false, false, false, false, false, 429 false, true } }; 430 431 // static 432 inline INetURLObject::SchemeInfo const & 433 INetURLObject::getSchemeInfo(INetProtocol eTheScheme) 434 { 435 return aSchemeInfoMap[eTheScheme]; 436 }; 437 438 //============================================================================ 439 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const 440 { 441 return getSchemeInfo(m_eScheme); 442 } 443 444 //============================================================================ 445 // static 446 inline void INetURLObject::appendEscape(rtl::OUStringBuffer & rTheText, 447 sal_Char cEscapePrefix, 448 sal_uInt32 nOctet) 449 { 450 rTheText.append(sal_Unicode(cEscapePrefix)); 451 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet >> 4)))); 452 rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet & 15)))); 453 } 454 455 //============================================================================ 456 namespace unnamed_tools_urlobj { 457 458 enum 459 { 460 PA = INetURLObject::PART_OBSOLETE_NORMAL, 461 PB = INetURLObject::PART_OBSOLETE_FILE, 462 PC = INetURLObject::PART_OBSOLETE_PARAM, 463 PD = INetURLObject::PART_USER_PASSWORD, 464 PE = INetURLObject::PART_IMAP_ACHAR, 465 PF = INetURLObject::PART_VIM, 466 PG = INetURLObject::PART_HOST_EXTRA, 467 PH = INetURLObject::PART_FPATH, 468 PI = INetURLObject::PART_AUTHORITY, 469 PJ = INetURLObject::PART_PATH_SEGMENTS_EXTRA, 470 PK = INetURLObject::PART_REL_SEGMENT_EXTRA, 471 PL = INetURLObject::PART_URIC, 472 PM = INetURLObject::PART_HTTP_PATH, 473 PN = INetURLObject::PART_FILE_SEGMENT_EXTRA, 474 PO = INetURLObject::PART_MESSAGE_ID, 475 PP = INetURLObject::PART_MESSAGE_ID_PATH, 476 PQ = INetURLObject::PART_MAILTO, 477 PR = INetURLObject::PART_PATH_BEFORE_QUERY, 478 PS = INetURLObject::PART_PCHAR, 479 PT = INetURLObject::PART_FRAGMENT, 480 PU = INetURLObject::PART_VISIBLE, 481 PV = INetURLObject::PART_VISIBLE_NONSPECIAL, 482 PW = INetURLObject::PART_CREATEFRAGMENT, 483 PX = INetURLObject::PART_UNO_PARAM_VALUE, 484 PY = INetURLObject::PART_UNAMBIGUOUS, 485 PZ = INetURLObject::PART_URIC_NO_SLASH, 486 P1 = INetURLObject::PART_HTTP_QUERY, 487 P2 = INetURLObject::PART_NEWS_ARTICLE_LOCALPART 488 }; 489 490 static sal_uInt32 const aMustEncodeMap[128] 491 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 493 /* */ PY, 494 /* ! */ PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 495 /* " */ PU+PV +PY, 496 /* # */ PU, 497 /* $ */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 498 /* % */ PU, 499 /* & */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP +PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2, 500 /* ' */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 501 /* ( */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 502 /* ) */ PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 503 /* * */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 504 /* + */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2, 505 /* , */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW +PZ+P1+P2, 506 /* - */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 507 /* . */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 508 /* / */ PA+PB+PC +PH +PJ +PL+PM +PP+PQ+PR +PT+PU+PV +PX +P2, 509 /* 0 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 510 /* 1 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 511 /* 2 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 512 /* 3 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 513 /* 4 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 514 /* 5 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 515 /* 6 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 516 /* 7 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 517 /* 8 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 518 /* 9 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 519 /* : */ PB+PC +PH+PI+PJ +PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX +PZ+P1+P2, 520 /* ; */ PC+PD +PI+PJ+PK+PL+PM +PO+PP+PQ+PR +PT+PU +PW +PZ+P1+P2, 521 /* < */ PC +PO+PP +PU+PV +PY, 522 /* = */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN +PR+PS+PT+PU+PV+PW +PZ+P1+P2, 523 /* > */ PC +PO+PP +PU+PV +PY, 524 /* ? */ PC +PL +PT+PU +PW+PX +PZ +P2, 525 /* @ */ PC +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1, 526 /* A */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 527 /* B */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 528 /* C */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 529 /* D */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 530 /* E */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 531 /* F */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 532 /* G */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 533 /* H */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 534 /* I */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 535 /* J */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 536 /* K */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 537 /* L */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 538 /* M */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 539 /* N */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 540 /* O */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 541 /* P */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 542 /* Q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 543 /* R */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 544 /* S */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 545 /* T */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 546 /* U */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 547 /* V */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 548 /* W */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 549 /* X */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 550 /* Y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 551 /* Z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 552 /* [ */ PL +PU+PV +PX, 553 /* \ */ PB +PU+PV +PY, 554 /* ] */ PL +PU+PV +PX, 555 /* ^ */ PU+PV +PY, 556 /* _ */ PA+PB+PC+PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 557 /* ` */ PU+PV +PY, 558 /* a */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 559 /* b */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 560 /* c */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 561 /* d */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 562 /* e */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 563 /* f */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 564 /* g */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 565 /* h */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 566 /* i */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 567 /* j */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 568 /* k */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 569 /* l */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 570 /* m */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 571 /* n */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 572 /* o */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 573 /* p */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 574 /* q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 575 /* r */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 576 /* s */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 577 /* t */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 578 /* u */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 579 /* v */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 580 /* w */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 581 /* x */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 582 /* y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 583 /* z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2, 584 /* { */ PU+PV +PY, 585 /* | */ PB+PC +PN +PT+PU+PV +PY, 586 /* } */ PU+PV +PY, 587 /* ~ */ PA+PB+PC+PD+PE +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ +P2, 588 0 }; 589 590 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart) 591 { 592 return !INetMIME::isUSASCII(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart); 593 } 594 595 } 596 597 //============================================================================ 598 void INetURLObject::setInvalid() 599 { 600 m_aAbsURIRef.setLength(0); 601 m_eScheme = INET_PROT_NOT_VALID; 602 m_aScheme.clear(); 603 m_aUser.clear(); 604 m_aAuth.clear(); 605 m_aHost.clear(); 606 m_aPort.clear(); 607 m_aPath.clear(); 608 m_aQuery.clear(); 609 m_aFragment.clear(); 610 } 611 612 //============================================================================ 613 614 namespace unnamed_tools_urlobj { 615 616 INetURLObject::FSysStyle 617 guessFSysStyleByCounting(sal_Unicode const * pBegin, 618 sal_Unicode const * pEnd, 619 INetURLObject::FSysStyle eStyle) 620 { 621 DBG_ASSERT(eStyle 622 & (INetURLObject::FSYS_UNX 623 | INetURLObject::FSYS_DOS 624 | INetURLObject::FSYS_MAC), 625 "guessFSysStyleByCounting(): Bad style"); 626 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd 627 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(), 628 "guessFSysStyleByCounting(): Too big"); 629 sal_Int32 nSlashCount 630 = eStyle & INetURLObject::FSYS_UNX ? 631 0 : std::numeric_limits< sal_Int32 >::min(); 632 sal_Int32 nBackslashCount 633 = eStyle & INetURLObject::FSYS_DOS ? 634 0 : std::numeric_limits< sal_Int32 >::min(); 635 sal_Int32 nColonCount 636 = eStyle & INetURLObject::FSYS_MAC ? 637 0 : std::numeric_limits< sal_Int32 >::min(); 638 while (pBegin != pEnd) 639 switch (*pBegin++) 640 { 641 case '/': 642 ++nSlashCount; 643 break; 644 645 case '\\': 646 ++nBackslashCount; 647 break; 648 649 case ':': 650 ++nColonCount; 651 break; 652 } 653 return nSlashCount >= nBackslashCount ? 654 nSlashCount >= nColonCount ? 655 INetURLObject::FSYS_UNX : INetURLObject::FSYS_MAC : 656 nBackslashCount >= nColonCount ? 657 INetURLObject::FSYS_DOS : INetURLObject::FSYS_MAC; 658 } 659 660 rtl::OUString parseScheme( 661 sal_Unicode const ** begin, sal_Unicode const * end, 662 sal_uInt32 fragmentDelimiter) 663 { 664 sal_Unicode const * p = *begin; 665 if (p != end && INetMIME::isAlpha(*p)) { 666 do { 667 ++p; 668 } while (p != end 669 && (INetMIME::isAlphanumeric(*p) || *p == '+' || *p == '-' 670 || *p == '.')); 671 // #i34835# To avoid problems with Windows file paths like "C:\foo", 672 // do not accept generic schemes that are only one character long: 673 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter 674 && p - *begin >= 2) 675 { 676 rtl::OUString scheme( 677 rtl::OUString(*begin, p - *begin).toAsciiLowerCase()); 678 *begin = p + 1; 679 return scheme; 680 } 681 } 682 return rtl::OUString(); 683 } 684 685 } 686 687 bool INetURLObject::setAbsURIRef(rtl::OUString const & rTheAbsURIRef, 688 bool bOctets, 689 EncodeMechanism eMechanism, 690 rtl_TextEncoding eCharset, 691 bool bSmart, 692 FSysStyle eStyle) 693 { 694 sal_Unicode const * pPos = rTheAbsURIRef.getStr(); 695 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength(); 696 697 setInvalid(); 698 699 sal_uInt32 nFragmentDelimiter = '#'; 700 701 rtl::OUStringBuffer aSynAbsURIRef; 702 703 // Parse <scheme>: 704 sal_Unicode const * p = pPos; 705 PrefixInfo const * pPrefix = getPrefix(p, pEnd); 706 if (pPrefix) 707 { 708 pPos = p; 709 m_eScheme = pPrefix->m_eScheme; 710 711 rtl::OUString sTemp(rtl::OUString::createFromAscii(pPrefix->m_eKind 712 >= PrefixInfo::EXTERNAL ? 713 pPrefix->m_pTranslatedPrefix : 714 pPrefix->m_pPrefix)); 715 aSynAbsURIRef.append(sTemp); 716 m_aScheme = SubString( 0, sTemp.indexOf(static_cast< sal_Unicode >(':')) ); 717 } 718 else 719 { 720 if (bSmart) 721 { 722 // For scheme detection, the first (if any) of the following 723 // productions that matches the input string (and for which the 724 // appropriate style bit is set in eStyle, if applicable) 725 // determines the scheme. The productions use the auxiliary rules 726 // 727 // domain = label *("." label) 728 // label = alphanum [*(alphanum / "-") alphanum] 729 // alphanum = ALPHA / DIGIT 730 // IPv6reference = "[" IPv6address "]" 731 // IPv6address = hexpart [":" IPv4address] 732 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT) 733 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) 734 // hexseq = hex4 *(":" hex4) 735 // hex4 = 1*4HEXDIG 736 // UCS4 = <any UCS4 character> 737 // 738 // 1st Production (known scheme): 739 // <one of the known schemes, ignoring case> ":" *UCS4 740 // 741 // 2nd Production (mailto): 742 // domain "@" domain 743 // 744 // 3rd Production (ftp): 745 // "FTP" 2*("." label) ["/" *UCS4] 746 // 747 // 4th Production (http): 748 // label 2*("." label) ["/" *UCS4] 749 // 750 // 5th Production (file): 751 // "//" (domain / IPv6reference) ["/" *UCS4] 752 // 753 // 6th Production (Unix file): 754 // "/" *UCS4 755 // 756 // 7th Production (UNC file; FSYS_DOS only): 757 // "\\" domain ["\" *UCS4] 758 // 759 // 8th Production (Unix-like DOS file; FSYS_DOS only): 760 // ALPHA ":" ["/" *UCS4] 761 // 762 // 9th Production (DOS file; FSYS_DOS only): 763 // ALPHA ":" ["\" *UCS4] 764 // 765 // For the 'non URL' file productions 6--9, the interpretation of 766 // the input as a (degenerate) URI is turned off, i.e., escape 767 // sequences and fragments are never detected as such, but are 768 // taken as literal characters. 769 770 sal_Unicode const * p1 = pPos; 771 if (eStyle & FSYS_DOS 772 && pEnd - p1 >= 2 773 && INetMIME::isAlpha(p1[0]) 774 && p1[1] == ':' 775 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\')) 776 { 777 m_eScheme = INET_PROT_FILE; // 8th, 9th 778 eMechanism = ENCODE_ALL; 779 nFragmentDelimiter = 0x80000000; 780 } 781 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/') 782 { 783 p1 += 2; 784 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd)) 785 && (p1 == pEnd || *p1 == '/')) 786 m_eScheme = INET_PROT_FILE; // 5th 787 } 788 else if (p1 != pEnd && *p1 == '/') 789 { 790 m_eScheme = INET_PROT_FILE; // 6th 791 eMechanism = ENCODE_ALL; 792 nFragmentDelimiter = 0x80000000; 793 } 794 else if (eStyle & FSYS_DOS 795 && pEnd - p1 >= 2 796 && p1[0] == '\\' 797 && p1[1] == '\\') 798 { 799 p1 += 2; 800 sal_Int32 n = rtl_ustr_indexOfChar_WithLength( 801 p1, pEnd - p1, '\\'); 802 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n; 803 if ( 804 parseHostOrNetBiosName( 805 p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW, 806 true, NULL) || 807 (scanDomain(p1, pe) > 0 && p1 == pe) 808 ) 809 { 810 m_eScheme = INET_PROT_FILE; // 7th 811 eMechanism = ENCODE_ALL; 812 nFragmentDelimiter = 0x80000000; 813 } 814 } 815 else 816 { 817 sal_Unicode const * pDomainEnd = p1; 818 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd); 819 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@') 820 { 821 ++pDomainEnd; 822 if (scanDomain(pDomainEnd, pEnd) > 0 823 && pDomainEnd == pEnd) 824 m_eScheme = INET_PROT_MAILTO; // 2nd 825 } 826 else if (nLabels >= 3 827 && (pDomainEnd == pEnd || *pDomainEnd == '/')) 828 m_eScheme 829 = pDomainEnd - p1 >= 4 830 && (p1[0] == 'f' || p1[0] == 'F') 831 && (p1[1] == 't' || p1[1] == 'T') 832 && (p1[2] == 'p' || p1[2] == 'P') 833 && p1[3] == '.' ? 834 INET_PROT_FTP : INET_PROT_HTTP; // 3rd, 4th 835 } 836 } 837 838 rtl::OUString aSynScheme; 839 if (m_eScheme == INET_PROT_NOT_VALID) { 840 sal_Unicode const * p1 = pPos; 841 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter); 842 if (aSynScheme.getLength() > 0) 843 { 844 m_eScheme = INET_PROT_GENERIC; 845 pPos = p1; 846 } 847 } 848 849 if (bSmart && m_eScheme == INET_PROT_NOT_VALID && pPos != pEnd 850 && *pPos != nFragmentDelimiter) 851 { 852 m_eScheme = m_eSmartScheme; 853 } 854 855 if (m_eScheme == INET_PROT_NOT_VALID) 856 { 857 setInvalid(); 858 return false; 859 } 860 861 if (m_eScheme != INET_PROT_GENERIC) { 862 aSynScheme = rtl::OUString::createFromAscii(getSchemeInfo().m_pScheme); 863 } 864 m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength()); 865 aSynAbsURIRef.append(sal_Unicode(':')); 866 } 867 868 sal_Char cEscapePrefix = getEscapePrefix(); 869 sal_uInt32 nSegmentDelimiter = '/'; 870 sal_uInt32 nAltSegmentDelimiter = 0x80000000; 871 bool bSkippedInitialSlash = false; 872 873 // Parse //<user>;AUTH=<auth>@<host>:<port> or 874 // //<user>:<password>@<host>:<port> or 875 // //<reg_name> 876 if (getSchemeInfo().m_bAuthority) 877 { 878 sal_Unicode const * pUserInfoBegin = 0; 879 sal_Unicode const * pUserInfoEnd = 0; 880 sal_Unicode const * pHostPortBegin = 0; 881 sal_Unicode const * pHostPortEnd = 0; 882 883 switch (m_eScheme) 884 { 885 case INET_PROT_VND_SUN_STAR_HELP: 886 { 887 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/') 888 { 889 setInvalid(); 890 return false; 891 } 892 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 893 rtl::OUStringBuffer aSynAuthority; 894 while (pPos < pEnd 895 && *pPos != '/' && *pPos != '?' 896 && *pPos != nFragmentDelimiter) 897 { 898 EscapeType eEscapeType; 899 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 900 cEscapePrefix, eMechanism, 901 eCharset, eEscapeType); 902 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets, 903 PART_AUTHORITY, cEscapePrefix, eCharset, 904 false); 905 } 906 m_aHost.set(aSynAbsURIRef, 907 aSynAuthority.makeStringAndClear(), 908 aSynAbsURIRef.getLength()); 909 // misusing m_aHost to store the authority 910 break; 911 } 912 913 case INET_PROT_VND_SUN_STAR_HIER: 914 { 915 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 916 { 917 pPos += 2; 918 aSynAbsURIRef. 919 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 920 rtl::OUStringBuffer aSynAuthority; 921 while (pPos < pEnd 922 && *pPos != '/' && *pPos != '?' 923 && *pPos != nFragmentDelimiter) 924 { 925 EscapeType eEscapeType; 926 sal_uInt32 nUTF32 = getUTF32(pPos, 927 pEnd, 928 bOctets, 929 cEscapePrefix, 930 eMechanism, 931 eCharset, 932 eEscapeType); 933 appendUCS4(aSynAuthority, 934 nUTF32, 935 eEscapeType, 936 bOctets, 937 PART_AUTHORITY, 938 cEscapePrefix, 939 eCharset, 940 false); 941 } 942 if (aSynAuthority.getLength() == 0) 943 { 944 setInvalid(); 945 return false; 946 } 947 m_aHost.set(aSynAbsURIRef, 948 aSynAuthority.makeStringAndClear(), 949 aSynAbsURIRef.getLength()); 950 // misusing m_aHost to store the authority 951 } 952 break; 953 } 954 955 case INET_PROT_VND_SUN_STAR_PKG: 956 { 957 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/') 958 { 959 setInvalid(); 960 return false; 961 } 962 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 963 rtl::OUStringBuffer aSynAuthority; 964 while (pPos < pEnd 965 && *pPos != '/' && *pPos != '?' 966 && *pPos != nFragmentDelimiter) 967 { 968 EscapeType eEscapeType; 969 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 970 cEscapePrefix, eMechanism, 971 eCharset, eEscapeType); 972 appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets, 973 PART_AUTHORITY, cEscapePrefix, eCharset, 974 false); 975 } 976 if (aSynAuthority.getLength() == 0) 977 { 978 setInvalid(); 979 return false; 980 } 981 m_aHost.set(aSynAbsURIRef, 982 aSynAuthority.makeStringAndClear(), 983 aSynAbsURIRef.getLength()); 984 // misusing m_aHost to store the authority 985 break; 986 } 987 988 case INET_PROT_FILE: 989 if (bSmart) 990 { 991 // The first of the following seven productions that 992 // matches the rest of the input string (and for which the 993 // appropriate style bit is set in eStyle, if applicable) 994 // determines the used notation. The productions use the 995 // auxiliary rules 996 // 997 // domain = label *("." label) 998 // label = alphanum [*(alphanum / "-") alphanum] 999 // alphanum = ALPHA / DIGIT 1000 // IPv6reference = "[" IPv6address "]" 1001 // IPv6address = hexpart [":" IPv4address] 1002 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT) 1003 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq]) 1004 // hexseq = hex4 *(":" hex4) 1005 // hex4 = 1*4HEXDIG 1006 // path = <any UCS4 character except "#"> 1007 // UCS4 = <any UCS4 character> 1008 1009 // 1st Production (URL): 1010 // "//" [domain / IPv6reference] ["/" *path] 1011 // ["#" *UCS4] 1012 // becomes 1013 // "file://" domain "/" *path ["#" *UCS4] 1014 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 1015 { 1016 sal_Unicode const * p1 = pPos + 2; 1017 while (p1 != pEnd && *p1 != '/' && 1018 *p1 != nFragmentDelimiter) 1019 { 1020 ++p1; 1021 } 1022 if (parseHostOrNetBiosName( 1023 pPos + 2, p1, bOctets, ENCODE_ALL, 1024 RTL_TEXTENCODING_DONTKNOW, true, NULL)) 1025 { 1026 aSynAbsURIRef. 1027 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1028 pHostPortBegin = pPos + 2; 1029 pHostPortEnd = p1; 1030 pPos = p1; 1031 break; 1032 } 1033 } 1034 1035 // 2nd Production (MS IE generated 1; FSYS_DOS only): 1036 // "//" ALPHA ":" ["/" *path] ["#" *UCS4] 1037 // becomes 1038 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] 1039 // replacing "\" by "/" within <*path> 1040 // 1041 // 3rd Production (MS IE generated 2; FSYS_DOS only): 1042 // "//" ALPHA ":" ["\" *path] ["#" *UCS4] 1043 // becomes 1044 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] 1045 // replacing "\" by "/" within <*path> 1046 // 1047 // 4th Production (misscounted slashes): 1048 // "//" *path ["#" *UCS4] 1049 // becomes 1050 // "file:///" *path ["#" *UCS4] 1051 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 1052 { 1053 aSynAbsURIRef. 1054 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1055 pPos += 2; 1056 bSkippedInitialSlash = true; 1057 if ((eStyle & FSYS_DOS) != 0 1058 && pEnd - pPos >= 2 1059 && INetMIME::isAlpha(pPos[0]) 1060 && pPos[1] == ':' 1061 && (pEnd - pPos == 2 1062 || pPos[2] == '/' || pPos[2] == '\\')) 1063 nAltSegmentDelimiter = '\\'; 1064 break; 1065 } 1066 1067 // 5th Production (Unix): 1068 // "/" *path ["#" *UCS4] 1069 // becomes 1070 // "file:///" *path ["#" *UCS4] 1071 if (pPos < pEnd && *pPos == '/') 1072 { 1073 aSynAbsURIRef. 1074 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1075 break; 1076 } 1077 1078 // 6th Production (UNC; FSYS_DOS only): 1079 // "\\" domain ["\" *path] ["#" *UCS4] 1080 // becomes 1081 // "file://" domain "/" *path ["#" *UCS4] 1082 // replacing "\" by "/" within <*path> 1083 if (eStyle & FSYS_DOS 1084 && pEnd - pPos >= 2 1085 && pPos[0] == '\\' 1086 && pPos[1] == '\\') 1087 { 1088 sal_Unicode const * p1 = pPos + 2; 1089 sal_Unicode const * pe = p1; 1090 while (pe < pEnd && *pe != '\\' && 1091 *pe != nFragmentDelimiter) 1092 { 1093 ++pe; 1094 } 1095 if ( 1096 parseHostOrNetBiosName( 1097 p1, pe, bOctets, ENCODE_ALL, 1098 RTL_TEXTENCODING_DONTKNOW, true, NULL) || 1099 (scanDomain(p1, pe) > 0 && p1 == pe) 1100 ) 1101 { 1102 aSynAbsURIRef. 1103 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1104 pHostPortBegin = pPos + 2; 1105 pHostPortEnd = pe; 1106 pPos = pe; 1107 nSegmentDelimiter = '\\'; 1108 break; 1109 } 1110 } 1111 1112 // 7th Production (Unix-like DOS; FSYS_DOS only): 1113 // ALPHA ":" ["/" *path] ["#" *UCS4] 1114 // becomes 1115 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] 1116 // replacing "\" by "/" within <*path> 1117 // 1118 // 8th Production (DOS; FSYS_DOS only): 1119 // ALPHA ":" ["\" *path] ["#" *UCS4] 1120 // becomes 1121 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4] 1122 // replacing "\" by "/" within <*path> 1123 if (eStyle & FSYS_DOS 1124 && pEnd - pPos >= 2 1125 && INetMIME::isAlpha(pPos[0]) 1126 && pPos[1] == ':' 1127 && (pEnd - pPos == 2 1128 || pPos[2] == '/' 1129 || pPos[2] == '\\')) 1130 { 1131 aSynAbsURIRef. 1132 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1133 nAltSegmentDelimiter = '\\'; 1134 bSkippedInitialSlash = true; 1135 break; 1136 } 1137 1138 // 9th Production (any): 1139 // *path ["#" *UCS4] 1140 // becomes 1141 // "file:///" *path ["#" *UCS4] 1142 // replacing the delimiter by "/" within <*path>. The 1143 // delimiter is that character from the set { "/", "\", 1144 // ":" } which appears most often in <*path> (if FSYS_UNX 1145 // is not among the style bits, "/" is removed from the 1146 // set; if FSYS_DOS is not among the style bits, "\" is 1147 // removed from the set; if FSYS_MAC is not among the 1148 // style bits, ":" is removed from the set). If two or 1149 // more characters appear the same number of times, the 1150 // character mentioned first in that set is chosen. If 1151 // the first character of <*path> is the delimiter, that 1152 // character is not copied. 1153 if (eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC)) 1154 { 1155 aSynAbsURIRef. 1156 appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1157 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle)) 1158 { 1159 case FSYS_UNX: 1160 nSegmentDelimiter = '/'; 1161 break; 1162 1163 case FSYS_DOS: 1164 nSegmentDelimiter = '\\'; 1165 break; 1166 1167 case FSYS_MAC: 1168 nSegmentDelimiter = ':'; 1169 break; 1170 1171 default: 1172 DBG_ERROR( 1173 "INetURLObject::setAbsURIRef():" 1174 " Bad guessFSysStyleByCounting"); 1175 break; 1176 } 1177 bSkippedInitialSlash 1178 = pPos != pEnd && *pPos != nSegmentDelimiter; 1179 break; 1180 } 1181 } 1182 default: 1183 { 1184 // For INET_PROT_FILE, allow an empty authority ("//") to be 1185 // missing if the following path starts with an explicit "/" 1186 // (Java is notorious in generating such file URLs, so be 1187 // liberal here): 1188 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 1189 pPos += 2; 1190 else if (!bSmart 1191 && !(m_eScheme == INET_PROT_FILE 1192 && pPos != pEnd && *pPos == '/')) 1193 { 1194 setInvalid(); 1195 return false; 1196 } 1197 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1198 1199 sal_Unicode const * pAuthority = pPos; 1200 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000; 1201 while (pPos < pEnd && *pPos != '/' && *pPos != c 1202 && *pPos != nFragmentDelimiter) 1203 ++pPos; 1204 if (getSchemeInfo().m_bUser) 1205 if (getSchemeInfo().m_bHost) 1206 { 1207 sal_Unicode const * p1 = pAuthority; 1208 while (p1 < pPos && *p1 != '@') 1209 ++p1; 1210 if (p1 == pPos) 1211 { 1212 pHostPortBegin = pAuthority; 1213 pHostPortEnd = pPos; 1214 } 1215 else 1216 { 1217 pUserInfoBegin = pAuthority; 1218 pUserInfoEnd = p1; 1219 pHostPortBegin = p1 + 1; 1220 pHostPortEnd = pPos; 1221 } 1222 } 1223 else 1224 { 1225 pUserInfoBegin = pAuthority; 1226 pUserInfoEnd = pPos; 1227 } 1228 else if (getSchemeInfo().m_bHost) 1229 { 1230 pHostPortBegin = pAuthority; 1231 pHostPortEnd = pPos; 1232 } 1233 else if (pPos != pAuthority) 1234 { 1235 setInvalid(); 1236 return false; 1237 } 1238 break; 1239 } 1240 } 1241 1242 if (pUserInfoBegin) 1243 { 1244 Part ePart = m_eScheme == INET_PROT_IMAP ? 1245 PART_IMAP_ACHAR : 1246 m_eScheme == INET_PROT_VIM ? 1247 PART_VIM : 1248 PART_USER_PASSWORD; 1249 bool bSupportsPassword = getSchemeInfo().m_bPassword; 1250 bool bSupportsAuth 1251 = !bSupportsPassword && getSchemeInfo().m_bAuth; 1252 bool bHasAuth = false; 1253 rtl::OUStringBuffer aSynUser; 1254 sal_Unicode const * p1 = pUserInfoBegin; 1255 while (p1 < pUserInfoEnd) 1256 { 1257 EscapeType eEscapeType; 1258 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets, 1259 cEscapePrefix, eMechanism, 1260 eCharset, eEscapeType); 1261 if (eEscapeType == ESCAPE_NO) 1262 { 1263 if (nUTF32 == ':' && bSupportsPassword) 1264 { 1265 bHasAuth = true; 1266 break; 1267 } 1268 else if (nUTF32 == ';' && bSupportsAuth 1269 && pUserInfoEnd - p1 1270 > RTL_CONSTASCII_LENGTH("auth=") 1271 && INetMIME::equalIgnoreCase( 1272 p1, 1273 p1 + RTL_CONSTASCII_LENGTH("auth="), 1274 "auth=")) 1275 { 1276 p1 += RTL_CONSTASCII_LENGTH("auth="); 1277 bHasAuth = true; 1278 break; 1279 } 1280 } 1281 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart, 1282 cEscapePrefix, eCharset, false); 1283 } 1284 m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(), 1285 aSynAbsURIRef.getLength()); 1286 if (bHasAuth) 1287 { 1288 if (bSupportsPassword) 1289 { 1290 aSynAbsURIRef.append(sal_Unicode(':')); 1291 rtl::OUStringBuffer aSynAuth; 1292 while (p1 < pUserInfoEnd) 1293 { 1294 EscapeType eEscapeType; 1295 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets, 1296 cEscapePrefix, 1297 eMechanism, eCharset, 1298 eEscapeType); 1299 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets, 1300 ePart, cEscapePrefix, eCharset, false); 1301 } 1302 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(), 1303 aSynAbsURIRef.getLength()); 1304 } 1305 else 1306 { 1307 aSynAbsURIRef. 1308 appendAscii(RTL_CONSTASCII_STRINGPARAM(";AUTH=")); 1309 rtl::OUStringBuffer aSynAuth; 1310 while (p1 < pUserInfoEnd) 1311 { 1312 EscapeType eEscapeType; 1313 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets, 1314 cEscapePrefix, 1315 eMechanism, eCharset, 1316 eEscapeType); 1317 if (!INetMIME::isIMAPAtomChar(nUTF32)) 1318 { 1319 setInvalid(); 1320 return false; 1321 } 1322 appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets, 1323 ePart, cEscapePrefix, eCharset, false); 1324 } 1325 m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(), 1326 aSynAbsURIRef.getLength()); 1327 } 1328 } 1329 if (pHostPortBegin) 1330 aSynAbsURIRef.append(sal_Unicode('@')); 1331 } 1332 1333 if (pHostPortBegin) 1334 { 1335 sal_Unicode const * pPort = pHostPortEnd; 1336 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd ) 1337 { 1338 sal_Unicode const * p1 = pHostPortEnd - 1; 1339 while (p1 > pHostPortBegin && INetMIME::isDigit(*p1)) 1340 --p1; 1341 if (*p1 == ':') 1342 pPort = p1; 1343 } 1344 bool bNetBiosName = false; 1345 switch (m_eScheme) 1346 { 1347 case INET_PROT_FILE: 1348 // If the host equals "LOCALHOST" (unencoded and ignoring 1349 // case), turn it into an empty host: 1350 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort, 1351 "localhost")) 1352 pHostPortBegin = pPort; 1353 bNetBiosName = true; 1354 break; 1355 1356 case INET_PROT_LDAP: 1357 case INET_PROT_SMB: 1358 if (pHostPortBegin == pPort && pPort != pHostPortEnd) 1359 { 1360 setInvalid(); 1361 return false; 1362 } 1363 break; 1364 default: 1365 if (pHostPortBegin == pPort) 1366 { 1367 setInvalid(); 1368 return false; 1369 } 1370 break; 1371 } 1372 rtl::OUStringBuffer aSynHost; 1373 if (!parseHostOrNetBiosName( 1374 pHostPortBegin, pPort, bOctets, eMechanism, eCharset, 1375 bNetBiosName, &aSynHost)) 1376 { 1377 setInvalid(); 1378 return false; 1379 } 1380 m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(), 1381 aSynAbsURIRef.getLength()); 1382 if (pPort != pHostPortEnd) 1383 { 1384 aSynAbsURIRef.append(sal_Unicode(':')); 1385 m_aPort.set(aSynAbsURIRef, 1386 rtl::OUString(pPort + 1, pHostPortEnd - (pPort + 1)), 1387 aSynAbsURIRef.getLength()); 1388 } 1389 } 1390 } 1391 1392 // Parse <path> 1393 rtl::OUStringBuffer aSynPath; 1394 if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset, 1395 bSkippedInitialSlash, nSegmentDelimiter, 1396 nAltSegmentDelimiter, 1397 getSchemeInfo().m_bQuery ? '?' : 0x80000000, 1398 nFragmentDelimiter, aSynPath)) 1399 { 1400 setInvalid(); 1401 return false; 1402 } 1403 m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(), 1404 aSynAbsURIRef.getLength()); 1405 1406 // Parse ?<query> 1407 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?') 1408 { 1409 aSynAbsURIRef.append(sal_Unicode('?')); 1410 rtl::OUStringBuffer aSynQuery; 1411 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;) 1412 { 1413 EscapeType eEscapeType; 1414 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix, 1415 eMechanism, eCharset, eEscapeType); 1416 appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets, 1417 PART_URIC, cEscapePrefix, eCharset, true); 1418 } 1419 m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(), 1420 aSynAbsURIRef.getLength()); 1421 } 1422 1423 // Parse #<fragment> 1424 if (pPos < pEnd && *pPos == nFragmentDelimiter) 1425 { 1426 aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter)); 1427 rtl::OUStringBuffer aSynFragment; 1428 for (++pPos; pPos < pEnd;) 1429 { 1430 EscapeType eEscapeType; 1431 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix, 1432 eMechanism, eCharset, eEscapeType); 1433 appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC, 1434 cEscapePrefix, eCharset, true); 1435 } 1436 m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(), 1437 aSynAbsURIRef.getLength()); 1438 } 1439 1440 if (pPos != pEnd) 1441 { 1442 setInvalid(); 1443 return false; 1444 } 1445 1446 m_aAbsURIRef = aSynAbsURIRef; 1447 1448 return true; 1449 } 1450 1451 //============================================================================ 1452 bool INetURLObject::convertRelToAbs(rtl::OUString const & rTheRelURIRef, 1453 bool bOctets, 1454 INetURLObject & rTheAbsURIRef, 1455 bool & rWasAbsolute, 1456 EncodeMechanism eMechanism, 1457 rtl_TextEncoding eCharset, 1458 bool bIgnoreFragment, bool bSmart, 1459 bool bRelativeNonURIs, FSysStyle eStyle) 1460 const 1461 { 1462 sal_Unicode const * p = rTheRelURIRef.getStr(); 1463 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength(); 1464 1465 sal_Unicode const * pPrefixBegin = p; 1466 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd); 1467 bool hasScheme = pPrefix != 0; 1468 if (!hasScheme) { 1469 pPrefixBegin = p; 1470 hasScheme = parseScheme(&pPrefixBegin, pEnd, '#').getLength() > 0; 1471 } 1472 1473 sal_uInt32 nSegmentDelimiter = '/'; 1474 sal_uInt32 nQueryDelimiter 1475 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000; 1476 sal_uInt32 nFragmentDelimiter = '#'; 1477 Part ePart = PART_VISIBLE; 1478 1479 if (!hasScheme && bSmart) 1480 { 1481 // If the input matches any of the following productions (for which 1482 // the appropriate style bit is set in eStyle), it is assumed to be an 1483 // absolute file system path, rather than a relative URI reference. 1484 // (This is only a subset of the productions used for scheme detection 1485 // in INetURLObject::setAbsURIRef(), because most of those productions 1486 // interfere with the syntax of relative URI references.) The 1487 // productions use the auxiliary rules 1488 // 1489 // domain = label *("." label) 1490 // label = alphanum [*(alphanum / "-") alphanum] 1491 // alphanum = ALPHA / DIGIT 1492 // UCS4 = <any UCS4 character> 1493 // 1494 // 1st Production (UNC file; FSYS_DOS only): 1495 // "\\" domain ["\" *UCS4] 1496 // 1497 // 2nd Production (Unix-like DOS file; FSYS_DOS only): 1498 // ALPHA ":" ["/" *UCS4] 1499 // 1500 // 3rd Production (DOS file; FSYS_DOS only): 1501 // ALPHA ":" ["\" *UCS4] 1502 if (eStyle & FSYS_DOS) 1503 { 1504 bool bFSys = false; 1505 sal_Unicode const * q = p; 1506 if (pEnd - q >= 2 1507 && INetMIME::isAlpha(q[0]) 1508 && q[1] == ':' 1509 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\')) 1510 bFSys = true; // 2nd, 3rd 1511 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\') 1512 { 1513 q += 2; 1514 sal_Int32 n = rtl_ustr_indexOfChar_WithLength( 1515 q, pEnd - q, '\\'); 1516 sal_Unicode const * qe = n == -1 ? pEnd : q + n; 1517 if (parseHostOrNetBiosName( 1518 q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW, 1519 true, NULL)) 1520 { 1521 bFSys = true; // 1st 1522 } 1523 } 1524 if (bFSys) 1525 { 1526 INetURLObject aNewURI; 1527 aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism, 1528 eCharset, true, eStyle); 1529 if (!aNewURI.HasError()) 1530 { 1531 rTheAbsURIRef = aNewURI; 1532 rWasAbsolute = true; 1533 return true; 1534 } 1535 } 1536 } 1537 1538 // When the base URL is a file URL, accept relative file system paths 1539 // using "\" or ":" as delimiter (and ignoring URI conventions for "%" 1540 // and "#"), as well as relative URIs using "/" as delimiter: 1541 if (m_eScheme == INET_PROT_FILE) 1542 switch (guessFSysStyleByCounting(p, pEnd, eStyle)) 1543 { 1544 case FSYS_UNX: 1545 nSegmentDelimiter = '/'; 1546 break; 1547 1548 case FSYS_DOS: 1549 nSegmentDelimiter = '\\'; 1550 bRelativeNonURIs = true; 1551 break; 1552 1553 case FSYS_MAC: 1554 nSegmentDelimiter = ':'; 1555 bRelativeNonURIs = true; 1556 break; 1557 1558 default: 1559 DBG_ERROR("INetURLObject::convertRelToAbs():" 1560 " Bad guessFSysStyleByCounting"); 1561 break; 1562 } 1563 1564 if (bRelativeNonURIs) 1565 { 1566 eMechanism = ENCODE_ALL; 1567 nQueryDelimiter = 0x80000000; 1568 nFragmentDelimiter = 0x80000000; 1569 ePart = PART_VISIBLE_NONSPECIAL; 1570 } 1571 } 1572 1573 // If the relative URI has the same scheme as the base URI, and that 1574 // scheme is hierarchical, then ignore its presence in the relative 1575 // URI in order to be backward compatible (cf. RFC 2396 section 5.2 1576 // step 3): 1577 if (pPrefix && pPrefix->m_eScheme == m_eScheme 1578 && getSchemeInfo().m_bHierarchical) 1579 { 1580 hasScheme = false; 1581 while (p != pEnd && *p++ != ':') ; 1582 } 1583 rWasAbsolute = hasScheme; 1584 1585 // Fast solution for non-relative URIs: 1586 if (hasScheme) 1587 { 1588 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset); 1589 if (aNewURI.HasError()) 1590 { 1591 rWasAbsolute = false; 1592 return false; 1593 } 1594 1595 if (bIgnoreFragment) 1596 aNewURI.clearFragment(); 1597 rTheAbsURIRef = aNewURI; 1598 return true; 1599 } 1600 1601 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT, 1602 STATE_DONE }; 1603 1604 rtl::OUStringBuffer aSynAbsURIRef; 1605 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme 1606 // is empty ("") in that case, so take the scheme from m_aAbsURIRef 1607 if (m_eScheme != INET_PROT_GENERIC) 1608 { 1609 aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme); 1610 } 1611 else 1612 { 1613 sal_Unicode const * pSchemeBegin 1614 = m_aAbsURIRef.getStr(); 1615 sal_Unicode const * pSchemeEnd = pSchemeBegin; 1616 while (pSchemeEnd[0] != ':') 1617 { 1618 ++pSchemeEnd; 1619 } 1620 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin); 1621 } 1622 aSynAbsURIRef.append(sal_Unicode(':')); 1623 1624 sal_Char cEscapePrefix = getEscapePrefix(); 1625 1626 State eState = STATE_AUTH; 1627 bool bSameDoc = true; 1628 1629 if (getSchemeInfo().m_bAuthority) 1630 { 1631 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/') 1632 { 1633 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 1634 p += 2; 1635 eState = STATE_ABS_PATH; 1636 bSameDoc = false; 1637 while (p != pEnd) 1638 { 1639 EscapeType eEscapeType; 1640 sal_uInt32 nUTF32 1641 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism, 1642 eCharset, eEscapeType); 1643 if (eEscapeType == ESCAPE_NO) 1644 { 1645 if (nUTF32 == nSegmentDelimiter) 1646 break; 1647 else if (nUTF32 == nFragmentDelimiter) 1648 { 1649 eState = STATE_FRAGMENT; 1650 break; 1651 } 1652 } 1653 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, 1654 PART_VISIBLE, cEscapePrefix, eCharset, true); 1655 } 1656 } 1657 else 1658 { 1659 SubString aAuthority(getAuthority()); 1660 aSynAbsURIRef.append(m_aAbsURIRef.getStr() 1661 + aAuthority.getBegin(), 1662 aAuthority.getLength()); 1663 } 1664 } 1665 1666 if (eState == STATE_AUTH) 1667 { 1668 if (p == pEnd) 1669 eState = STATE_DONE; 1670 else if (*p == nFragmentDelimiter) 1671 { 1672 ++p; 1673 eState = STATE_FRAGMENT; 1674 } 1675 else if (*p == nSegmentDelimiter) 1676 { 1677 ++p; 1678 eState = STATE_ABS_PATH; 1679 bSameDoc = false; 1680 } 1681 else 1682 { 1683 eState = STATE_REL_PATH; 1684 bSameDoc = false; 1685 } 1686 } 1687 1688 if (eState == STATE_ABS_PATH) 1689 { 1690 aSynAbsURIRef.append(sal_Unicode('/')); 1691 eState = STATE_DONE; 1692 while (p != pEnd) 1693 { 1694 EscapeType eEscapeType; 1695 sal_uInt32 nUTF32 1696 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism, 1697 eCharset, eEscapeType); 1698 if (eEscapeType == ESCAPE_NO) 1699 { 1700 if (nUTF32 == nFragmentDelimiter) 1701 { 1702 eState = STATE_FRAGMENT; 1703 break; 1704 } 1705 else if (nUTF32 == nSegmentDelimiter) 1706 nUTF32 = '/'; 1707 } 1708 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart, 1709 cEscapePrefix, eCharset, true); 1710 } 1711 } 1712 else if (eState == STATE_REL_PATH) 1713 { 1714 if (!getSchemeInfo().m_bHierarchical) 1715 { 1716 // Detect cases where a relative input could not be made absolute 1717 // because the given base URL is broken (most probably because it is 1718 // empty): 1719 OSL_ASSERT(!HasError()); 1720 rWasAbsolute = false; 1721 return false; 1722 } 1723 1724 sal_Unicode const * pBasePathBegin 1725 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 1726 sal_Unicode const * pBasePathEnd 1727 = pBasePathBegin + m_aPath.getLength(); 1728 while (pBasePathEnd != pBasePathBegin) 1729 if (*(--pBasePathEnd) == '/') 1730 { 1731 ++pBasePathEnd; 1732 break; 1733 } 1734 1735 sal_Int32 nPathBegin = aSynAbsURIRef.getLength(); 1736 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin); 1737 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin 1738 && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1) == '/', 1739 "INetURLObject::convertRelToAbs(): Bad base path"); 1740 1741 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter) 1742 { 1743 if (*p == '.') 1744 { 1745 if (pEnd - p == 1 1746 || p[1] == nSegmentDelimiter 1747 || p[1] == nQueryDelimiter 1748 || p[1] == nFragmentDelimiter) 1749 { 1750 ++p; 1751 if (p != pEnd && *p == nSegmentDelimiter) 1752 ++p; 1753 continue; 1754 } 1755 else if (pEnd - p >= 2 1756 && p[1] == '.' 1757 && (pEnd - p == 2 1758 || p[2] == nSegmentDelimiter 1759 || p[2] == nQueryDelimiter 1760 || p[2] == nFragmentDelimiter) 1761 && aSynAbsURIRef.getLength() - nPathBegin > 1) 1762 { 1763 p += 2; 1764 if (p != pEnd && *p == nSegmentDelimiter) 1765 ++p; 1766 1767 sal_Int32 i = aSynAbsURIRef.getLength() - 2; 1768 while (i > nPathBegin && aSynAbsURIRef.charAt(i) != '/') 1769 --i; 1770 aSynAbsURIRef.setLength(i + 1); 1771 DBG_ASSERT( 1772 aSynAbsURIRef.getLength() > nPathBegin 1773 && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1) 1774 == '/', 1775 "INetURLObject::convertRelToAbs(): Bad base path"); 1776 continue; 1777 } 1778 } 1779 1780 while (p != pEnd 1781 && *p != nSegmentDelimiter 1782 && *p != nQueryDelimiter 1783 && *p != nFragmentDelimiter) 1784 { 1785 EscapeType eEscapeType; 1786 sal_uInt32 nUTF32 1787 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism, 1788 eCharset, eEscapeType); 1789 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart, 1790 cEscapePrefix, eCharset, true); 1791 } 1792 if (p != pEnd && *p == nSegmentDelimiter) 1793 { 1794 aSynAbsURIRef.append(sal_Unicode('/')); 1795 ++p; 1796 } 1797 } 1798 1799 while (p != pEnd && *p != nFragmentDelimiter) 1800 { 1801 EscapeType eEscapeType; 1802 sal_uInt32 nUTF32 1803 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism, 1804 eCharset, eEscapeType); 1805 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart, 1806 cEscapePrefix, eCharset, true); 1807 } 1808 1809 if (p == pEnd) 1810 eState = STATE_DONE; 1811 else 1812 { 1813 ++p; 1814 eState = STATE_FRAGMENT; 1815 } 1816 } 1817 else if (bSameDoc) 1818 { 1819 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(), 1820 m_aPath.getLength()); 1821 if (m_aQuery.isPresent()) 1822 aSynAbsURIRef.append(m_aAbsURIRef.getStr() 1823 + m_aQuery.getBegin() - 1, 1824 m_aQuery.getLength() + 1); 1825 } 1826 1827 if (eState == STATE_FRAGMENT && !bIgnoreFragment) 1828 { 1829 aSynAbsURIRef.append(sal_Unicode('#')); 1830 while (p != pEnd) 1831 { 1832 EscapeType eEscapeType; 1833 sal_uInt32 nUTF32 1834 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism, 1835 eCharset, eEscapeType); 1836 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, 1837 PART_VISIBLE, cEscapePrefix, eCharset, true); 1838 } 1839 } 1840 1841 INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear()); 1842 if (aNewURI.HasError()) 1843 { 1844 // Detect cases where a relative input could not be made absolute 1845 // because the given base URL is broken (most probably because it is 1846 // empty): 1847 OSL_ASSERT(!HasError()); 1848 rWasAbsolute = false; 1849 return false; 1850 } 1851 1852 rTheAbsURIRef = aNewURI; 1853 return true; 1854 } 1855 1856 //============================================================================ 1857 bool INetURLObject::convertAbsToRel(rtl::OUString const & rTheAbsURIRef, 1858 bool bOctets, rtl::OUString & rTheRelURIRef, 1859 EncodeMechanism eEncodeMechanism, 1860 DecodeMechanism eDecodeMechanism, 1861 rtl_TextEncoding eCharset, 1862 FSysStyle eStyle) const 1863 { 1864 // Check for hierarchical base URL: 1865 if (!getSchemeInfo().m_bHierarchical) 1866 { 1867 rTheRelURIRef 1868 = decode(rTheAbsURIRef, 1869 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)), 1870 eDecodeMechanism, eCharset); 1871 return false; 1872 } 1873 1874 // Convert the input (absolute or relative URI ref) to an absolute URI 1875 // ref: 1876 INetURLObject aSubject; 1877 bool bWasAbsolute; 1878 if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute, 1879 eEncodeMechanism, eCharset, false, false, false, 1880 eStyle)) 1881 { 1882 rTheRelURIRef 1883 = decode(rTheAbsURIRef, 1884 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)), 1885 eDecodeMechanism, eCharset); 1886 return false; 1887 } 1888 1889 // Check for differing scheme or authority parts: 1890 if ((m_aScheme.compare( 1891 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef) 1892 != 0) 1893 || (m_aUser.compare( 1894 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef) 1895 != 0) 1896 || (m_aAuth.compare( 1897 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef) 1898 != 0) 1899 || (m_aHost.compare( 1900 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef) 1901 != 0) 1902 || (m_aPort.compare( 1903 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef) 1904 != 0)) 1905 { 1906 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); 1907 return false; 1908 } 1909 1910 sal_Unicode const * pBasePathBegin 1911 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 1912 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength(); 1913 sal_Unicode const * pSubjectPathBegin 1914 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin(); 1915 sal_Unicode const * pSubjectPathEnd 1916 = pSubjectPathBegin + aSubject.m_aPath.getLength(); 1917 1918 // Make nMatch point past the last matching slash, or past the end of the 1919 // paths, in case they are equal: 1920 sal_Unicode const * pSlash = 0; 1921 sal_Unicode const * p1 = pBasePathBegin; 1922 sal_Unicode const * p2 = pSubjectPathBegin; 1923 for (;;) 1924 { 1925 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd) 1926 { 1927 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd) 1928 pSlash = p1; 1929 break; 1930 } 1931 1932 sal_Unicode c = *p1++; 1933 if (c != *p2++) 1934 break; 1935 if (c == '/') 1936 pSlash = p1; 1937 } 1938 if (!pSlash) 1939 { 1940 // One of the paths does not start with '/': 1941 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); 1942 return false; 1943 } 1944 sal_Int32 nMatch = pSlash - pBasePathBegin; 1945 1946 // If the two URLs are DOS file URLs starting with different volumes 1947 // (e.g., file:///a:/... and file:///b:/...), the subject is not made 1948 // relative (it could be, but some people do not like that): 1949 if (m_eScheme == INET_PROT_FILE 1950 && nMatch <= 1 1951 && hasDosVolume(eStyle) 1952 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these? 1953 { 1954 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset); 1955 return false; 1956 } 1957 1958 // For every slash in the base path after nMatch, a prefix of "../" is 1959 // added to the new relative URL (if the common prefix of the two paths is 1960 // only "/"---but see handling of file URLs above---, the complete subject 1961 // path could go into the new relative URL instead, but some people don't 1962 // like that): 1963 rtl::OUStringBuffer aSynRelURIRef; 1964 // if (nMatch <= 1) nMatch = 0; else // see comment above 1965 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd; 1966 ++p) 1967 { 1968 if (*p == '/') 1969 aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("../")); 1970 } 1971 1972 // If the new relative URL would start with "//" (i.e., it would be 1973 // mistaken for a relative URL starting with an authority part), or if the 1974 // new relative URL would neither be empty nor start with <"/"> nor start 1975 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting 1976 // with a scheme part), then the new relative URL is prefixed with "./": 1977 if (aSynRelURIRef.getLength() == 0) 1978 { 1979 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2 1980 && pSubjectPathBegin[nMatch] == '/' 1981 && pSubjectPathBegin[nMatch + 1] == '/') 1982 { 1983 aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("./")); 1984 } 1985 else 1986 { 1987 for (sal_Unicode const * p = pSubjectPathBegin + nMatch; 1988 p != pSubjectPathEnd && *p != '/'; ++p) 1989 { 1990 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA)) 1991 { 1992 aSynRelURIRef. 1993 appendAscii(RTL_CONSTASCII_STRINGPARAM("./")); 1994 break; 1995 } 1996 } 1997 } 1998 } 1999 2000 // The remainder of the subject path, starting at nMatch, is appended to 2001 // the new relative URL: 2002 sal_Char cEscapePrefix = getEscapePrefix(); 2003 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd, 2004 cEscapePrefix, eDecodeMechanism, eCharset)); 2005 2006 // If the subject has defined query or fragment parts, they are appended 2007 // to the new relative URL: 2008 if (aSubject.m_aQuery.isPresent()) 2009 { 2010 aSynRelURIRef.append(sal_Unicode('?')); 2011 aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, cEscapePrefix, 2012 eDecodeMechanism, eCharset)); 2013 } 2014 if (aSubject.m_aFragment.isPresent()) 2015 { 2016 aSynRelURIRef.append(sal_Unicode('#')); 2017 aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment, 2018 cEscapePrefix, eDecodeMechanism, eCharset)); 2019 } 2020 2021 rTheRelURIRef = aSynRelURIRef.makeStringAndClear(); 2022 return true; 2023 } 2024 2025 //============================================================================ 2026 // static 2027 bool INetURLObject::convertIntToExt(rtl::OUString const & rTheIntURIRef, 2028 bool bOctets, rtl::OUString & rTheExtURIRef, 2029 DecodeMechanism eDecodeMechanism, 2030 rtl_TextEncoding eCharset) 2031 { 2032 sal_Char cEscapePrefix 2033 = getEscapePrefix(CompareProtocolScheme(rTheIntURIRef)); 2034 rtl::OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE, 2035 cEscapePrefix, NOT_CANONIC, eCharset, 2036 true)); 2037 sal_Unicode const * pBegin = aSynExtURIRef.getStr(); 2038 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength(); 2039 sal_Unicode const * p = pBegin; 2040 PrefixInfo const * pPrefix = getPrefix(p, pEnd); 2041 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL; 2042 if (bConvert) 2043 { 2044 aSynExtURIRef = 2045 aSynExtURIRef.replaceAt(0, p - pBegin, 2046 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix)); 2047 } 2048 rTheExtURIRef = decode(aSynExtURIRef, cEscapePrefix, eDecodeMechanism, 2049 eCharset); 2050 return bConvert; 2051 } 2052 2053 //============================================================================ 2054 // static 2055 bool INetURLObject::convertExtToInt(rtl::OUString const & rTheExtURIRef, 2056 bool bOctets, rtl::OUString & rTheIntURIRef, 2057 DecodeMechanism eDecodeMechanism, 2058 rtl_TextEncoding eCharset) 2059 { 2060 sal_Char cEscapePrefix 2061 = getEscapePrefix(CompareProtocolScheme(rTheExtURIRef)); 2062 rtl::OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE, 2063 cEscapePrefix, NOT_CANONIC, eCharset, 2064 true)); 2065 sal_Unicode const * pBegin = aSynIntURIRef.getStr(); 2066 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength(); 2067 sal_Unicode const * p = pBegin; 2068 PrefixInfo const * pPrefix = getPrefix(p, pEnd); 2069 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL; 2070 if (bConvert) 2071 { 2072 aSynIntURIRef = 2073 aSynIntURIRef.replaceAt(0, p - pBegin, 2074 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix)); 2075 } 2076 rTheIntURIRef = decode(aSynIntURIRef, cEscapePrefix, eDecodeMechanism, 2077 eCharset); 2078 return bConvert; 2079 } 2080 2081 //============================================================================ 2082 // static 2083 INetURLObject::PrefixInfo const * 2084 INetURLObject::getPrefix(sal_Unicode const *& rBegin, 2085 sal_Unicode const * pEnd) 2086 { 2087 static PrefixInfo const aMap[] 2088 = { // dummy entry at front needed, because pLast may point here: 2089 { 0, 0, INET_PROT_NOT_VALID, PrefixInfo::INTERNAL }, 2090 { ".component:", "staroffice.component:", INET_PROT_COMPONENT, 2091 PrefixInfo::INTERNAL }, 2092 { ".uno:", "staroffice.uno:", INET_PROT_UNO, 2093 PrefixInfo::INTERNAL }, 2094 { "cid:", 0, INET_PROT_CID, PrefixInfo::OFFICIAL }, 2095 { "data:", 0, INET_PROT_DATA, PrefixInfo::OFFICIAL }, 2096 { "db:", "staroffice.db:", INET_PROT_DB, PrefixInfo::INTERNAL }, 2097 { "file:", 0, INET_PROT_FILE, PrefixInfo::OFFICIAL }, 2098 { "ftp:", 0, INET_PROT_FTP, PrefixInfo::OFFICIAL }, 2099 { "hid:", "staroffice.hid:", INET_PROT_HID, 2100 PrefixInfo::INTERNAL }, 2101 { "http:", 0, INET_PROT_HTTP, PrefixInfo::OFFICIAL }, 2102 { "https:", 0, INET_PROT_HTTPS, PrefixInfo::OFFICIAL }, 2103 { "imap:", 0, INET_PROT_IMAP, PrefixInfo::OFFICIAL }, 2104 { "javascript:", 0, INET_PROT_JAVASCRIPT, PrefixInfo::OFFICIAL }, 2105 { "ldap:", 0, INET_PROT_LDAP, PrefixInfo::OFFICIAL }, 2106 { "macro:", "staroffice.macro:", INET_PROT_MACRO, 2107 PrefixInfo::INTERNAL }, 2108 { "mailto:", 0, INET_PROT_MAILTO, PrefixInfo::OFFICIAL }, 2109 { "news:", 0, INET_PROT_NEWS, PrefixInfo::OFFICIAL }, 2110 { "out:", "staroffice.out:", INET_PROT_OUT, 2111 PrefixInfo::INTERNAL }, 2112 { "pop3:", "staroffice.pop3:", INET_PROT_POP3, 2113 PrefixInfo::INTERNAL }, 2114 { "private:", "staroffice.private:", INET_PROT_PRIV_SOFFICE, 2115 PrefixInfo::INTERNAL }, 2116 { "private:factory/", "staroffice.factory:", 2117 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL }, 2118 { "private:helpid/", "staroffice.helpid:", INET_PROT_PRIV_SOFFICE, 2119 PrefixInfo::INTERNAL }, 2120 { "private:java/", "staroffice.java:", INET_PROT_PRIV_SOFFICE, 2121 PrefixInfo::INTERNAL }, 2122 { "private:searchfolder:", "staroffice.searchfolder:", 2123 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL }, 2124 { "private:trashcan:", "staroffice.trashcan:", 2125 INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL }, 2126 { "slot:", "staroffice.slot:", INET_PROT_SLOT, 2127 PrefixInfo::INTERNAL }, 2128 { "smb:", 0, INET_PROT_SMB, PrefixInfo::OFFICIAL }, 2129 { "staroffice.component:", ".component:", INET_PROT_COMPONENT, 2130 PrefixInfo::EXTERNAL }, 2131 { "staroffice.db:", "db:", INET_PROT_DB, PrefixInfo::EXTERNAL }, 2132 { "staroffice.factory:", "private:factory/", 2133 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL }, 2134 { "staroffice.helpid:", "private:helpid/", INET_PROT_PRIV_SOFFICE, 2135 PrefixInfo::EXTERNAL }, 2136 { "staroffice.hid:", "hid:", INET_PROT_HID, 2137 PrefixInfo::EXTERNAL }, 2138 { "staroffice.java:", "private:java/", INET_PROT_PRIV_SOFFICE, 2139 PrefixInfo::EXTERNAL }, 2140 { "staroffice.macro:", "macro:", INET_PROT_MACRO, 2141 PrefixInfo::EXTERNAL }, 2142 { "staroffice.out:", "out:", INET_PROT_OUT, 2143 PrefixInfo::EXTERNAL }, 2144 { "staroffice.pop3:", "pop3:", INET_PROT_POP3, 2145 PrefixInfo::EXTERNAL }, 2146 { "staroffice.private:", "private:", INET_PROT_PRIV_SOFFICE, 2147 PrefixInfo::EXTERNAL }, 2148 { "staroffice.searchfolder:", "private:searchfolder:", 2149 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL }, 2150 { "staroffice.slot:", "slot:", INET_PROT_SLOT, 2151 PrefixInfo::EXTERNAL }, 2152 { "staroffice.trashcan:", "private:trashcan:", 2153 INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL }, 2154 { "staroffice.uno:", ".uno:", INET_PROT_UNO, 2155 PrefixInfo::EXTERNAL }, 2156 { "staroffice.vim:", "vim:", INET_PROT_VIM, 2157 PrefixInfo::EXTERNAL }, 2158 { "staroffice:", "private:", INET_PROT_PRIV_SOFFICE, 2159 PrefixInfo::EXTERNAL }, 2160 { "telnet:", 0, INET_PROT_TELNET, PrefixInfo::OFFICIAL }, 2161 { "vim:", "staroffice.vim:", INET_PROT_VIM, 2162 PrefixInfo::INTERNAL }, 2163 { "vnd.sun.star.cmd:", 0, INET_PROT_VND_SUN_STAR_CMD, 2164 PrefixInfo::OFFICIAL }, 2165 { "vnd.sun.star.expand:", 0, INET_PROT_VND_SUN_STAR_EXPAND, 2166 PrefixInfo::OFFICIAL }, 2167 { "vnd.sun.star.help:", 0, INET_PROT_VND_SUN_STAR_HELP, 2168 PrefixInfo::OFFICIAL }, 2169 { "vnd.sun.star.hier:", 0, INET_PROT_VND_SUN_STAR_HIER, 2170 PrefixInfo::OFFICIAL }, 2171 { "vnd.sun.star.odma:", 0, INET_PROT_VND_SUN_STAR_ODMA, 2172 PrefixInfo::OFFICIAL }, 2173 { "vnd.sun.star.pkg:", 0, INET_PROT_VND_SUN_STAR_PKG, 2174 PrefixInfo::OFFICIAL }, 2175 { "vnd.sun.star.tdoc:", 0, INET_PROT_VND_SUN_STAR_TDOC, 2176 PrefixInfo::OFFICIAL }, 2177 { "vnd.sun.star.webdav:", 0, INET_PROT_VND_SUN_STAR_WEBDAV, 2178 PrefixInfo::OFFICIAL } }; 2179 PrefixInfo const * pFirst = aMap + 1; 2180 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1; 2181 PrefixInfo const * pMatch = 0; 2182 sal_Unicode const * pMatched = rBegin; 2183 sal_Unicode const * p = rBegin; 2184 sal_Int32 i = 0; 2185 for (; pFirst < pLast; ++i) 2186 { 2187 if (pFirst->m_pPrefix[i] == '\0') 2188 { 2189 pMatch = pFirst++; 2190 pMatched = p; 2191 } 2192 if (p >= pEnd) 2193 break; 2194 sal_uInt32 nChar = INetMIME::toLowerCase(*p++); 2195 while (pFirst <= pLast && sal_uChar(pFirst->m_pPrefix[i]) < nChar) 2196 ++pFirst; 2197 while (pFirst <= pLast && sal_uChar(pLast->m_pPrefix[i]) > nChar) 2198 --pLast; 2199 } 2200 if (pFirst == pLast) 2201 { 2202 sal_Char const * q = pFirst->m_pPrefix + i; 2203 while (p < pEnd && *q != '\0' 2204 && INetMIME::toLowerCase(*p) == sal_uChar(*q)) 2205 { 2206 ++p; 2207 ++q; 2208 } 2209 if (*q == '\0') 2210 { 2211 rBegin = p; 2212 return pFirst; 2213 } 2214 } 2215 rBegin = pMatched; 2216 return pMatch; 2217 } 2218 2219 //============================================================================ 2220 sal_Int32 INetURLObject::getAuthorityBegin() const 2221 { 2222 DBG_ASSERT(getSchemeInfo().m_bAuthority, 2223 "INetURLObject::getAuthority(): Bad scheme"); 2224 sal_Int32 nBegin; 2225 if (m_aUser.isPresent()) 2226 nBegin = m_aUser.getBegin(); 2227 else if (m_aHost.isPresent()) 2228 nBegin = m_aHost.getBegin(); 2229 else 2230 nBegin = m_aPath.getBegin(); 2231 nBegin -= RTL_CONSTASCII_LENGTH("//"); 2232 DBG_ASSERT(m_aAbsURIRef.charAt(nBegin) == '/' 2233 && m_aAbsURIRef.charAt(nBegin + 1) == '/', 2234 "INetURLObject::getAuthority(): Bad authority"); 2235 return nBegin; 2236 } 2237 2238 //============================================================================ 2239 INetURLObject::SubString INetURLObject::getAuthority() const 2240 { 2241 sal_Int32 nBegin = getAuthorityBegin(); 2242 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() : 2243 m_aHost.isPresent() ? m_aHost.getEnd() : 2244 m_aAuth.isPresent() ? m_aAuth.getEnd() : 2245 m_aUser.isPresent() ? m_aUser.getEnd() : 2246 nBegin + RTL_CONSTASCII_LENGTH("//"); 2247 return SubString(nBegin, nEnd - nBegin); 2248 } 2249 2250 //============================================================================ 2251 bool INetURLObject::setUser(rtl::OUString const & rTheUser, 2252 bool bOctets, EncodeMechanism eMechanism, 2253 rtl_TextEncoding eCharset) 2254 { 2255 if ( 2256 !getSchemeInfo().m_bUser || 2257 (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0) 2258 ) 2259 { 2260 return false; 2261 } 2262 2263 rtl::OUString aNewUser(encodeText(rTheUser, bOctets, 2264 m_eScheme == INET_PROT_IMAP ? 2265 PART_IMAP_ACHAR : 2266 m_eScheme == INET_PROT_VIM ? 2267 PART_VIM : 2268 PART_USER_PASSWORD, 2269 getEscapePrefix(), eMechanism, eCharset, 2270 false)); 2271 sal_Int32 nDelta; 2272 if (m_aUser.isPresent()) 2273 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser); 2274 else if (m_aHost.isPresent()) 2275 { 2276 m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@')); 2277 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1; 2278 } 2279 else if (getSchemeInfo().m_bHost) 2280 return false; 2281 else 2282 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin()); 2283 m_aAuth += nDelta; 2284 m_aHost += nDelta; 2285 m_aPort += nDelta; 2286 m_aPath += nDelta; 2287 m_aQuery += nDelta; 2288 m_aFragment += nDelta; 2289 return true; 2290 } 2291 2292 namespace 2293 { 2294 void lcl_Erase(rtl::OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count) 2295 { 2296 rtl::OUString sTemp(rBuf.makeStringAndClear()); 2297 rBuf.append(sTemp.replaceAt(index, count, rtl::OUString())); 2298 } 2299 } 2300 2301 //============================================================================ 2302 bool INetURLObject::clearPassword() 2303 { 2304 if (!getSchemeInfo().m_bPassword) 2305 return false; 2306 if (m_aAuth.isPresent()) 2307 { 2308 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1, 2309 m_aAuth.getLength() + 1); 2310 sal_Int32 nDelta = m_aAuth.clear() - 1; 2311 m_aHost += nDelta; 2312 m_aPort += nDelta; 2313 m_aPath += nDelta; 2314 m_aQuery += nDelta; 2315 m_aFragment += nDelta; 2316 } 2317 return true; 2318 } 2319 2320 //============================================================================ 2321 bool INetURLObject::setPassword(rtl::OUString const & rThePassword, 2322 bool bOctets, EncodeMechanism eMechanism, 2323 rtl_TextEncoding eCharset) 2324 { 2325 if (!getSchemeInfo().m_bPassword) 2326 return false; 2327 rtl::OUString aNewAuth(encodeText(rThePassword, bOctets, 2328 m_eScheme == INET_PROT_VIM ? 2329 PART_VIM : PART_USER_PASSWORD, 2330 getEscapePrefix(), eMechanism, eCharset, 2331 false)); 2332 sal_Int32 nDelta; 2333 if (m_aAuth.isPresent()) 2334 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth); 2335 else if (m_aUser.isPresent()) 2336 { 2337 m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':')); 2338 nDelta 2339 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1; 2340 } 2341 else if (m_aHost.isPresent()) 2342 { 2343 m_aAbsURIRef.insert(m_aHost.getBegin(), 2344 rtl::OUString::createFromAscii(":@")); 2345 m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aHost.getBegin()); 2346 nDelta 2347 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2; 2348 } 2349 else if (getSchemeInfo().m_bHost) 2350 return false; 2351 else 2352 { 2353 m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':')); 2354 m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aPath.getBegin()); 2355 nDelta 2356 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1; 2357 } 2358 m_aHost += nDelta; 2359 m_aPort += nDelta; 2360 m_aPath += nDelta; 2361 m_aQuery += nDelta; 2362 m_aFragment += nDelta; 2363 return true; 2364 } 2365 2366 //============================================================================ 2367 // static 2368 bool INetURLObject::parseHost( 2369 sal_Unicode const *& rBegin, sal_Unicode const * pEnd, 2370 rtl::OUString & rCanonic) 2371 { 2372 // RFC 2373 is inconsistent about how to write an IPv6 address in which an 2373 // IPv4 address directly follows the abbreviating "::". The ABNF in 2374 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly 2375 // mentions "::13:1.68.3". This algorithm accepts both variants: 2376 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN, 2377 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN, 2378 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6, 2379 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON, 2380 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON, 2381 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2, 2382 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4, 2383 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE }; 2384 rtl::OUStringBuffer aTheCanonic; 2385 sal_uInt32 nNumber = 0; 2386 int nDigits = 0; 2387 int nOctets = 0; 2388 State eState = STATE_INITIAL; 2389 sal_Unicode const * p = rBegin; 2390 for (; p != pEnd; ++p) 2391 switch (eState) 2392 { 2393 case STATE_INITIAL: 2394 if (*p == '[') 2395 { 2396 aTheCanonic.append(sal_Unicode('[')); 2397 eState = STATE_IP6; 2398 } 2399 else if (INetMIME::isAlpha(*p)) 2400 eState = STATE_TOPLABEL; 2401 else if (INetMIME::isDigit(*p)) 2402 { 2403 nNumber = INetMIME::getWeight(*p); 2404 nDigits = 1; 2405 nOctets = 1; 2406 eState = STATE_IP4; 2407 } 2408 else 2409 goto done; 2410 break; 2411 2412 case STATE_LABEL: 2413 if (*p == '.') 2414 eState = STATE_LABEL_DOT; 2415 else if (*p == '-') 2416 eState = STATE_LABEL_HYPHEN; 2417 else if (!INetMIME::isAlphanumeric(*p)) 2418 goto done; 2419 break; 2420 2421 case STATE_LABEL_HYPHEN: 2422 if (INetMIME::isAlphanumeric(*p)) 2423 eState = STATE_LABEL; 2424 else if (*p != '-') 2425 goto done; 2426 break; 2427 2428 case STATE_LABEL_DOT: 2429 if (INetMIME::isAlpha(*p)) 2430 eState = STATE_TOPLABEL; 2431 else if (INetMIME::isDigit(*p)) 2432 eState = STATE_LABEL; 2433 else 2434 goto done; 2435 break; 2436 2437 case STATE_TOPLABEL: 2438 if (*p == '.') 2439 eState = STATE_TOPLABEL_DOT; 2440 else if (*p == '-') 2441 eState = STATE_TOPLABEL_HYPHEN; 2442 else if (!INetMIME::isAlphanumeric(*p)) 2443 goto done; 2444 break; 2445 2446 case STATE_TOPLABEL_HYPHEN: 2447 if (INetMIME::isAlphanumeric(*p)) 2448 eState = STATE_TOPLABEL; 2449 else if (*p != '-') 2450 goto done; 2451 break; 2452 2453 case STATE_TOPLABEL_DOT: 2454 if (INetMIME::isAlpha(*p)) 2455 eState = STATE_TOPLABEL; 2456 else if (INetMIME::isDigit(*p)) 2457 eState = STATE_LABEL; 2458 else 2459 goto done; 2460 break; 2461 2462 case STATE_IP4: 2463 if (*p == '.') 2464 if (nOctets < 4) 2465 { 2466 aTheCanonic.append( 2467 rtl::OUString::valueOf(sal_Int32(nNumber))); 2468 aTheCanonic.append(sal_Unicode('.')); 2469 ++nOctets; 2470 eState = STATE_IP4_DOT; 2471 } 2472 else 2473 eState = STATE_LABEL_DOT; 2474 else if (*p == '-') 2475 eState = STATE_LABEL_HYPHEN; 2476 else if (INetMIME::isAlpha(*p)) 2477 eState = STATE_LABEL; 2478 else if (INetMIME::isDigit(*p)) 2479 if (nDigits < 3) 2480 { 2481 nNumber = 10 * nNumber + INetMIME::getWeight(*p); 2482 ++nDigits; 2483 } 2484 else 2485 eState = STATE_LABEL; 2486 else 2487 goto done; 2488 break; 2489 2490 case STATE_IP4_DOT: 2491 if (INetMIME::isAlpha(*p)) 2492 eState = STATE_TOPLABEL; 2493 else if (INetMIME::isDigit(*p)) 2494 { 2495 nNumber = INetMIME::getWeight(*p); 2496 nDigits = 1; 2497 eState = STATE_IP4; 2498 } 2499 else 2500 goto done; 2501 break; 2502 2503 case STATE_IP6: 2504 if (*p == ':') 2505 eState = STATE_IP6_COLON; 2506 else if (INetMIME::isHexDigit(*p)) 2507 { 2508 nNumber = INetMIME::getHexWeight(*p); 2509 nDigits = 1; 2510 eState = STATE_IP6_HEXSEQ1; 2511 } 2512 else 2513 goto done; 2514 break; 2515 2516 case STATE_IP6_COLON: 2517 if (*p == ':') 2518 { 2519 aTheCanonic.appendAscii(RTL_CONSTASCII_STRINGPARAM("::")); 2520 eState = STATE_IP6_2COLON; 2521 } 2522 else 2523 goto done; 2524 break; 2525 2526 case STATE_IP6_2COLON: 2527 if (*p == ']') 2528 eState = STATE_IP6_DONE; 2529 else if (*p == ':') 2530 { 2531 aTheCanonic.append(sal_Unicode(':')); 2532 eState = STATE_IP6_3COLON; 2533 } 2534 else if (INetMIME::isDigit(*p)) 2535 { 2536 nNumber = INetMIME::getWeight(*p); 2537 nDigits = 1; 2538 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4; 2539 } 2540 else if (INetMIME::isHexDigit(*p)) 2541 { 2542 nNumber = INetMIME::getHexWeight(*p); 2543 nDigits = 1; 2544 eState = STATE_IP6_HEXSEQ2; 2545 } 2546 else 2547 goto done; 2548 break; 2549 2550 case STATE_IP6_3COLON: 2551 if (INetMIME::isDigit(*p)) 2552 { 2553 nNumber = INetMIME::getWeight(*p); 2554 nDigits = 1; 2555 nOctets = 1; 2556 eState = STATE_IP6_IP4; 2557 } 2558 else 2559 goto done; 2560 break; 2561 2562 case STATE_IP6_HEXSEQ1: 2563 if (*p == ']') 2564 { 2565 aTheCanonic.append( 2566 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2567 eState = STATE_IP6_DONE; 2568 } 2569 else if (*p == ':') 2570 { 2571 aTheCanonic.append( 2572 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2573 aTheCanonic.append(sal_Unicode(':')); 2574 eState = STATE_IP6_HEXSEQ1_COLON; 2575 } 2576 else if (INetMIME::isHexDigit(*p) && nDigits < 4) 2577 { 2578 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); 2579 ++nDigits; 2580 } 2581 else 2582 goto done; 2583 break; 2584 2585 case STATE_IP6_HEXSEQ1_COLON: 2586 if (*p == ':') 2587 { 2588 aTheCanonic.append(sal_Unicode(':')); 2589 eState = STATE_IP6_2COLON; 2590 } 2591 else if (INetMIME::isDigit(*p)) 2592 { 2593 nNumber = INetMIME::getWeight(*p); 2594 nDigits = 1; 2595 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4; 2596 } 2597 else if (INetMIME::isHexDigit(*p)) 2598 { 2599 nNumber = INetMIME::getHexWeight(*p); 2600 nDigits = 1; 2601 eState = STATE_IP6_HEXSEQ1; 2602 } 2603 else 2604 goto done; 2605 break; 2606 2607 case STATE_IP6_HEXSEQ1_MAYBE_IP4: 2608 if (*p == ']') 2609 { 2610 aTheCanonic.append( 2611 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2612 eState = STATE_IP6_DONE; 2613 } 2614 else if (*p == ':') 2615 { 2616 aTheCanonic.append( 2617 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2618 aTheCanonic.append(sal_Unicode(':')); 2619 eState = STATE_IP6_HEXSEQ1_COLON; 2620 } 2621 else if (*p == '.') 2622 { 2623 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15) 2624 + (nNumber & 15); 2625 aTheCanonic.append( 2626 rtl::OUString::valueOf(sal_Int32(nNumber))); 2627 aTheCanonic.append(sal_Unicode('.')); 2628 nOctets = 2; 2629 eState = STATE_IP6_IP4_DOT; 2630 } 2631 else if (INetMIME::isDigit(*p) && nDigits < 3) 2632 { 2633 nNumber = 16 * nNumber + INetMIME::getWeight(*p); 2634 ++nDigits; 2635 } 2636 else if (INetMIME::isHexDigit(*p) && nDigits < 4) 2637 { 2638 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); 2639 ++nDigits; 2640 eState = STATE_IP6_HEXSEQ1; 2641 } 2642 else 2643 goto done; 2644 break; 2645 2646 case STATE_IP6_HEXSEQ2: 2647 if (*p == ']') 2648 { 2649 aTheCanonic.append( 2650 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2651 eState = STATE_IP6_DONE; 2652 } 2653 else if (*p == ':') 2654 { 2655 aTheCanonic.append( 2656 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2657 aTheCanonic.append(sal_Unicode(':')); 2658 eState = STATE_IP6_HEXSEQ2_COLON; 2659 } 2660 else if (INetMIME::isHexDigit(*p) && nDigits < 4) 2661 { 2662 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); 2663 ++nDigits; 2664 } 2665 else 2666 goto done; 2667 break; 2668 2669 case STATE_IP6_HEXSEQ2_COLON: 2670 if (INetMIME::isDigit(*p)) 2671 { 2672 nNumber = INetMIME::getWeight(*p); 2673 nDigits = 1; 2674 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4; 2675 } 2676 else if (INetMIME::isHexDigit(*p)) 2677 { 2678 nNumber = INetMIME::getHexWeight(*p); 2679 nDigits = 1; 2680 eState = STATE_IP6_HEXSEQ2; 2681 } 2682 else 2683 goto done; 2684 break; 2685 2686 case STATE_IP6_HEXSEQ2_MAYBE_IP4: 2687 if (*p == ']') 2688 { 2689 aTheCanonic.append( 2690 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2691 eState = STATE_IP6_DONE; 2692 } 2693 else if (*p == ':') 2694 { 2695 aTheCanonic.append( 2696 rtl::OUString::valueOf(sal_Int32(nNumber), 16)); 2697 aTheCanonic.append(sal_Unicode(':')); 2698 eState = STATE_IP6_HEXSEQ2_COLON; 2699 } 2700 else if (*p == '.') 2701 { 2702 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15) 2703 + (nNumber & 15); 2704 aTheCanonic.append( 2705 rtl::OUString::valueOf(sal_Int32(nNumber))); 2706 aTheCanonic.append(sal_Unicode('.')); 2707 nOctets = 2; 2708 eState = STATE_IP6_IP4_DOT; 2709 } 2710 else if (INetMIME::isDigit(*p) && nDigits < 3) 2711 { 2712 nNumber = 16 * nNumber + INetMIME::getWeight(*p); 2713 ++nDigits; 2714 } 2715 else if (INetMIME::isHexDigit(*p) && nDigits < 4) 2716 { 2717 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p); 2718 ++nDigits; 2719 eState = STATE_IP6_HEXSEQ2; 2720 } 2721 else 2722 goto done; 2723 break; 2724 2725 case STATE_IP6_IP4: 2726 if (*p == ']') 2727 if (nOctets == 4) 2728 { 2729 aTheCanonic.append( 2730 rtl::OUString::valueOf(sal_Int32(nNumber))); 2731 eState = STATE_IP6_DONE; 2732 } 2733 else 2734 goto done; 2735 else if (*p == '.') 2736 if (nOctets < 4) 2737 { 2738 aTheCanonic.append( 2739 rtl::OUString::valueOf(sal_Int32(nNumber))); 2740 aTheCanonic.append(sal_Unicode('.')); 2741 ++nOctets; 2742 eState = STATE_IP6_IP4_DOT; 2743 } 2744 else 2745 goto done; 2746 else if (INetMIME::isDigit(*p) && nDigits < 3) 2747 { 2748 nNumber = 10 * nNumber + INetMIME::getWeight(*p); 2749 ++nDigits; 2750 } 2751 else 2752 goto done; 2753 break; 2754 2755 case STATE_IP6_IP4_DOT: 2756 if (INetMIME::isDigit(*p)) 2757 { 2758 nNumber = INetMIME::getWeight(*p); 2759 nDigits = 1; 2760 eState = STATE_IP6_IP4; 2761 } 2762 else 2763 goto done; 2764 break; 2765 2766 case STATE_IP6_DONE: 2767 goto done; 2768 } 2769 done: 2770 switch (eState) 2771 { 2772 case STATE_LABEL: 2773 case STATE_TOPLABEL: 2774 case STATE_TOPLABEL_DOT: 2775 aTheCanonic.setLength(0); 2776 aTheCanonic.append(rBegin, p - rBegin); 2777 rBegin = p; 2778 rCanonic = aTheCanonic.makeStringAndClear(); 2779 return true; 2780 2781 case STATE_IP4: 2782 if (nOctets == 4) 2783 { 2784 aTheCanonic.append( 2785 rtl::OUString::valueOf(sal_Int32(nNumber))); 2786 rBegin = p; 2787 rCanonic = aTheCanonic.makeStringAndClear(); 2788 return true; 2789 } 2790 return false; 2791 2792 case STATE_IP6_DONE: 2793 aTheCanonic.append(sal_Unicode(']')); 2794 rBegin = p; 2795 rCanonic = aTheCanonic.makeStringAndClear(); 2796 return true; 2797 2798 default: 2799 return false; 2800 } 2801 } 2802 2803 //============================================================================ 2804 // static 2805 bool INetURLObject::parseHostOrNetBiosName( 2806 sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets, 2807 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName, 2808 rtl::OUStringBuffer* pCanonic) 2809 { 2810 rtl::OUString aTheCanonic; 2811 if (pBegin < pEnd) 2812 { 2813 sal_Unicode const * p = pBegin; 2814 if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd) 2815 { 2816 if (bNetBiosName) 2817 { 2818 rtl::OUStringBuffer buf; 2819 while (pBegin < pEnd) 2820 { 2821 EscapeType eEscapeType; 2822 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, '%', 2823 eMechanism, eCharset, 2824 eEscapeType); 2825 if (!INetMIME::isVisible(nUTF32)) 2826 return false; 2827 if (!INetMIME::isAlphanumeric(nUTF32)) 2828 switch (nUTF32) 2829 { 2830 case '"': 2831 case '*': 2832 case '+': 2833 case ',': 2834 case '/': 2835 case ':': 2836 case ';': 2837 case '<': 2838 case '=': 2839 case '>': 2840 case '?': 2841 case '[': 2842 case '\\': 2843 case ']': 2844 case '`': 2845 case '|': 2846 return false;; 2847 } 2848 if (pCanonic != NULL) { 2849 appendUCS4( 2850 buf, nUTF32, eEscapeType, bOctets, PART_URIC, '%', 2851 eCharset, true); 2852 } 2853 } 2854 aTheCanonic = buf.makeStringAndClear(); 2855 } 2856 else 2857 return false; 2858 } 2859 } 2860 if (pCanonic != NULL) { 2861 *pCanonic = aTheCanonic; 2862 } 2863 return true; 2864 } 2865 2866 //============================================================================ 2867 // static 2868 rtl::OUString INetURLObject::encodeHostPort(rtl::OUString const & rTheHostPort, 2869 bool bOctets, 2870 EncodeMechanism eMechanism, 2871 rtl_TextEncoding eCharset) 2872 { 2873 sal_Int32 nPort = rTheHostPort.getLength(); 2874 if (nPort != 0) 2875 { 2876 sal_Int32 i = nPort - 1; 2877 while (i != 0 && INetMIME::isDigit(rTheHostPort.getStr()[i])) 2878 --i; 2879 if (rTheHostPort.getStr()[i] == ':') 2880 nPort = i; 2881 } 2882 rtl::OUString aResult(encodeText(rTheHostPort.copy(0, nPort), bOctets, 2883 PART_HOST_EXTRA, '%', eMechanism, eCharset, 2884 true)); 2885 aResult += rTheHostPort.copy(nPort); 2886 return aResult; 2887 } 2888 2889 //============================================================================ 2890 bool INetURLObject::setHost(rtl::OUString const & rTheHost, bool bOctets, 2891 EncodeMechanism eMechanism, 2892 rtl_TextEncoding eCharset) 2893 { 2894 if (!getSchemeInfo().m_bHost) 2895 return false; 2896 rtl::OUStringBuffer aSynHost(rTheHost); 2897 bool bNetBiosName = false; 2898 switch (m_eScheme) 2899 { 2900 case INET_PROT_FILE: 2901 { 2902 rtl::OUString sTemp(aSynHost); 2903 if (sTemp.equalsIgnoreAsciiCaseAsciiL( 2904 RTL_CONSTASCII_STRINGPARAM("localhost"))) 2905 { 2906 aSynHost.setLength(0); 2907 } 2908 bNetBiosName = true; 2909 } 2910 break; 2911 case INET_PROT_LDAP: 2912 if (aSynHost.getLength() == 0 && m_aPort.isPresent()) 2913 return false; 2914 break; 2915 2916 default: 2917 if (aSynHost.getLength() == 0) 2918 return false; 2919 break; 2920 } 2921 if (!parseHostOrNetBiosName( 2922 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(), 2923 bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost)) 2924 return false; 2925 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear()); 2926 m_aPort += nDelta; 2927 m_aPath += nDelta; 2928 m_aQuery += nDelta; 2929 m_aFragment += nDelta; 2930 return true; 2931 } 2932 2933 //============================================================================ 2934 // static 2935 bool INetURLObject::parsePath(INetProtocol eScheme, 2936 sal_Unicode const ** pBegin, 2937 sal_Unicode const * pEnd, 2938 bool bOctets, 2939 EncodeMechanism eMechanism, 2940 rtl_TextEncoding eCharset, 2941 bool bSkippedInitialSlash, 2942 sal_uInt32 nSegmentDelimiter, 2943 sal_uInt32 nAltSegmentDelimiter, 2944 sal_uInt32 nQueryDelimiter, 2945 sal_uInt32 nFragmentDelimiter, 2946 rtl::OUStringBuffer &rSynPath) 2947 { 2948 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param"); 2949 2950 sal_Unicode const * pPos = *pBegin; 2951 rtl::OUStringBuffer aTheSynPath; 2952 2953 switch (eScheme) 2954 { 2955 case INET_PROT_NOT_VALID: 2956 return false; 2957 2958 case INET_PROT_FTP: 2959 case INET_PROT_IMAP: 2960 if (pPos < pEnd && *pPos != '/') 2961 return false; 2962 while (pPos < pEnd && *pPos != nFragmentDelimiter) 2963 { 2964 EscapeType eEscapeType; 2965 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 2966 '%', eMechanism, 2967 eCharset, eEscapeType); 2968 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 2969 PART_HTTP_PATH, '%', eCharset, true); 2970 } 2971 if (aTheSynPath.getLength() == 0) 2972 aTheSynPath.append(sal_Unicode('/')); 2973 break; 2974 2975 case INET_PROT_HTTP: 2976 case INET_PROT_VND_SUN_STAR_WEBDAV: 2977 case INET_PROT_HTTPS: 2978 case INET_PROT_SMB: 2979 if (pPos < pEnd && *pPos != '/') 2980 return false; 2981 while (pPos < pEnd && *pPos != nQueryDelimiter 2982 && *pPos != nFragmentDelimiter) 2983 { 2984 EscapeType eEscapeType; 2985 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 2986 '%', eMechanism, 2987 eCharset, eEscapeType); 2988 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 2989 PART_HTTP_PATH, '%', eCharset, true); 2990 } 2991 if (aTheSynPath.getLength() == 0) 2992 aTheSynPath.append(sal_Unicode('/')); 2993 break; 2994 2995 case INET_PROT_FILE: 2996 { 2997 if (bSkippedInitialSlash) 2998 aTheSynPath.append(sal_Unicode('/')); 2999 else if (pPos < pEnd 3000 && *pPos != nSegmentDelimiter 3001 && *pPos != nAltSegmentDelimiter) 3002 return false; 3003 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3004 { 3005 EscapeType eEscapeType; 3006 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3007 '%', eMechanism, 3008 eCharset, eEscapeType); 3009 if (eEscapeType == ESCAPE_NO) 3010 { 3011 if (nUTF32 == nSegmentDelimiter 3012 || nUTF32 == nAltSegmentDelimiter) 3013 { 3014 aTheSynPath.append(sal_Unicode('/')); 3015 continue; 3016 } 3017 else if (nUTF32 == '|' 3018 && (pPos == pEnd 3019 || *pPos == nFragmentDelimiter 3020 || *pPos == nSegmentDelimiter 3021 || *pPos == nAltSegmentDelimiter) 3022 && aTheSynPath.getLength() == 2 3023 && INetMIME::isAlpha(aTheSynPath.charAt(1))) 3024 { 3025 // A first segment of <ALPHA "|"> is translated to 3026 // <ALPHA ":">: 3027 aTheSynPath.append(sal_Unicode(':')); 3028 continue; 3029 } 3030 } 3031 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3032 PART_PCHAR, '%', eCharset, true); 3033 } 3034 if (aTheSynPath.getLength() == 0) 3035 aTheSynPath.append(sal_Unicode('/')); 3036 break; 3037 } 3038 3039 case INET_PROT_MAILTO: 3040 while (pPos < pEnd && *pPos != nQueryDelimiter 3041 && *pPos != nFragmentDelimiter) 3042 { 3043 EscapeType eEscapeType; 3044 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3045 '%', eMechanism, 3046 eCharset, eEscapeType); 3047 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3048 PART_MAILTO, '%', eCharset, true); 3049 } 3050 break; 3051 3052 case INET_PROT_NEWS: 3053 if (pPos == pEnd || *pPos == nQueryDelimiter 3054 || *pPos == nFragmentDelimiter) 3055 return false; 3056 3057 // Match <"*">: 3058 if (*pPos == '*' 3059 && (pEnd - pPos == 1 || pPos[1] == nQueryDelimiter 3060 || pPos[1] == nFragmentDelimiter)) 3061 { 3062 ++pPos; 3063 aTheSynPath.append(sal_Unicode('*')); 3064 break; 3065 } 3066 3067 // Match <group>: 3068 if (INetMIME::isAlpha(*pPos)) 3069 for (sal_Unicode const * p = pPos + 1;; ++p) 3070 if (p == pEnd || *p == nQueryDelimiter 3071 || *p == nFragmentDelimiter) 3072 { 3073 aTheSynPath.setLength(0); 3074 aTheSynPath.append(pPos, p - pPos); 3075 pPos = p; 3076 goto done; 3077 } 3078 else if (!INetMIME::isAlphanumeric(*p) && *p != '+' 3079 && *p != '-' && *p != '.' && *p != '_') 3080 break; 3081 3082 // Match <article>: 3083 for (;;) 3084 { 3085 if (pPos == pEnd || *pPos == nQueryDelimiter 3086 || *pPos == nFragmentDelimiter) 3087 return false; 3088 if (*pPos == '@') 3089 break; 3090 EscapeType eEscapeType; 3091 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, '%', 3092 eMechanism, eCharset, eEscapeType); 3093 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3094 PART_NEWS_ARTICLE_LOCALPART, '%', eCharset, true); 3095 } 3096 if (aTheSynPath.getLength() == 0) 3097 return false; 3098 ++pPos; 3099 aTheSynPath.append(sal_Unicode('@')); 3100 { 3101 sal_Unicode const * p = pPos; 3102 while (p < pEnd && *pPos != nQueryDelimiter 3103 && *pPos != nFragmentDelimiter) 3104 ++p; 3105 rtl::OUString aCanonic; 3106 if (!parseHost(pPos, p, aCanonic)) 3107 return false; 3108 aTheSynPath.append(aCanonic); 3109 } 3110 3111 done: 3112 break; 3113 3114 case INET_PROT_POP3: 3115 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3116 { 3117 EscapeType eEscapeType; 3118 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3119 '%', eMechanism, 3120 eCharset, eEscapeType); 3121 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3122 PART_MESSAGE_ID_PATH, '%', eCharset, 3123 true); 3124 } 3125 break; 3126 3127 case INET_PROT_PRIV_SOFFICE: 3128 case INET_PROT_SLOT: 3129 case INET_PROT_HID: 3130 case INET_PROT_MACRO: 3131 case INET_PROT_UNO: 3132 case INET_PROT_COMPONENT: 3133 case INET_PROT_LDAP: 3134 while (pPos < pEnd && *pPos != nQueryDelimiter 3135 && *pPos != nFragmentDelimiter) 3136 { 3137 EscapeType eEscapeType; 3138 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3139 '%', eMechanism, 3140 eCharset, eEscapeType); 3141 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3142 PART_PATH_BEFORE_QUERY, '%', eCharset, 3143 true); 3144 } 3145 break; 3146 3147 case INET_PROT_VND_SUN_STAR_HELP: 3148 if (pPos == pEnd 3149 || *pPos == nQueryDelimiter 3150 || *pPos == nFragmentDelimiter) 3151 aTheSynPath.append(sal_Unicode('/')); 3152 else 3153 { 3154 if (*pPos != '/') 3155 return false; 3156 while (pPos < pEnd && *pPos != nQueryDelimiter 3157 && *pPos != nFragmentDelimiter) 3158 { 3159 EscapeType eEscapeType; 3160 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3161 '%', eMechanism, 3162 eCharset, eEscapeType); 3163 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3164 PART_HTTP_PATH, '%', eCharset, true); 3165 } 3166 } 3167 break; 3168 3169 case INET_PROT_JAVASCRIPT: 3170 case INET_PROT_DATA: 3171 case INET_PROT_CID: 3172 case INET_PROT_DB: 3173 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3174 { 3175 EscapeType eEscapeType; 3176 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3177 '%', eMechanism, 3178 eCharset, eEscapeType); 3179 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3180 PART_URIC, '%', eCharset, true); 3181 } 3182 break; 3183 3184 case INET_PROT_OUT: 3185 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '~') 3186 return false; 3187 aTheSynPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("/~")); 3188 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3189 { 3190 EscapeType eEscapeType; 3191 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3192 '%', eMechanism, 3193 eCharset, eEscapeType); 3194 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3195 PART_URIC, '%', eCharset, true); 3196 } 3197 break; 3198 3199 case INET_PROT_VND_SUN_STAR_HIER: 3200 case INET_PROT_VND_SUN_STAR_PKG: 3201 if (pPos < pEnd && *pPos != '/' 3202 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter) 3203 return false; 3204 while (pPos < pEnd && *pPos != nQueryDelimiter 3205 && *pPos != nFragmentDelimiter) 3206 { 3207 EscapeType eEscapeType; 3208 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3209 '%', eMechanism, 3210 eCharset, eEscapeType); 3211 if (eEscapeType == ESCAPE_NO && nUTF32 == '/') 3212 aTheSynPath.append(sal_Unicode('/')); 3213 else 3214 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3215 PART_PCHAR, '%', eCharset, false); 3216 } 3217 if (aTheSynPath.getLength() == 0) 3218 aTheSynPath.append(sal_Unicode('/')); 3219 break; 3220 3221 case INET_PROT_VIM: 3222 { 3223 /* test had to be taken out to make parsePath static; ok since INET_PROT_VIM is 3224 obsolete, anyway 3225 if (m_aUser.isEmpty()) 3226 return false; 3227 */ 3228 sal_Unicode const * pPathEnd = pPos; 3229 while (pPathEnd < pEnd && *pPathEnd != nFragmentDelimiter) 3230 ++pPathEnd; 3231 aTheSynPath.append(sal_Unicode('/')); 3232 if (pPos == pPathEnd) 3233 break; 3234 else if (*pPos++ != '/') 3235 return false; 3236 if (pPos == pPathEnd) 3237 break; 3238 while (pPos < pPathEnd && *pPos != '/') 3239 { 3240 EscapeType eEscapeType; 3241 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets, 3242 '=', eMechanism, 3243 eCharset, eEscapeType); 3244 appendUCS4(aTheSynPath, 3245 eEscapeType == ESCAPE_NO ? 3246 INetMIME::toLowerCase(nUTF32) : nUTF32, 3247 eEscapeType, bOctets, PART_VIM, '=', 3248 eCharset, false); 3249 } 3250 bool bInbox; 3251 rtl::OUString sCompare(aTheSynPath); 3252 if (sCompare.equalsAscii("/inbox")) 3253 bInbox = true; 3254 else if (sCompare.equalsAscii("/newsgroups")) 3255 bInbox = false; 3256 else 3257 return false; 3258 aTheSynPath.append(sal_Unicode('/')); 3259 if (pPos == pPathEnd) 3260 break; 3261 else if (*pPos++ != '/') 3262 return false; 3263 if (!bInbox) 3264 { 3265 bool bEmpty = true; 3266 while (pPos < pPathEnd && *pPos != '/') 3267 { 3268 EscapeType eEscapeType; 3269 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets, 3270 '=', eMechanism, 3271 eCharset, eEscapeType); 3272 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3273 PART_VIM, '=', eCharset, false); 3274 bEmpty = false; 3275 } 3276 if (bEmpty) 3277 return false; 3278 aTheSynPath.append(sal_Unicode('/')); 3279 if (pPos == pPathEnd) 3280 break; 3281 else if (*pPos++ != '/') 3282 return false; 3283 } 3284 bool bEmpty = true; 3285 while (pPos < pPathEnd && *pPos != ':') 3286 { 3287 EscapeType eEscapeType; 3288 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets, 3289 '=', eMechanism, 3290 eCharset, eEscapeType); 3291 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3292 PART_VIM, '=', eCharset, false); 3293 bEmpty = false; 3294 } 3295 if (bEmpty) 3296 return false; 3297 if (pPos == pPathEnd) 3298 break; 3299 else if (*pPos++ != ':') 3300 return false; 3301 aTheSynPath.append(sal_Unicode(':')); 3302 for (int i = 0; i < 3; ++i) 3303 { 3304 if (i != 0) 3305 { 3306 if (pPos == pPathEnd || *pPos++ != '.') 3307 return false; 3308 aTheSynPath.append(sal_Unicode('.')); 3309 } 3310 bEmpty = true; 3311 while (pPos < pPathEnd && *pPos != '.') 3312 { 3313 EscapeType eEscapeType; 3314 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets, 3315 '=', eMechanism, 3316 eCharset, eEscapeType); 3317 if (!INetMIME::isDigit(nUTF32)) 3318 return false; 3319 aTheSynPath.append(sal_Unicode(nUTF32)); 3320 bEmpty = false; 3321 } 3322 if (bEmpty) 3323 return false; 3324 } 3325 if (pPos != pPathEnd) 3326 return false; 3327 break; 3328 } 3329 3330 case INET_PROT_VND_SUN_STAR_CMD: 3331 case INET_PROT_VND_SUN_STAR_EXPAND: 3332 { 3333 if (pPos == pEnd || *pPos == nFragmentDelimiter) 3334 return false; 3335 Part ePart = PART_URIC_NO_SLASH; 3336 while (pPos != pEnd && *pPos != nFragmentDelimiter) 3337 { 3338 EscapeType eEscapeType; 3339 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3340 '%', eMechanism, 3341 eCharset, eEscapeType); 3342 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart, 3343 '%', eCharset, true); 3344 ePart = PART_URIC; 3345 } 3346 break; 3347 } 3348 3349 case INET_PROT_VND_SUN_STAR_ODMA: 3350 if (pPos < pEnd) 3351 { 3352 if (*pPos == '/') 3353 ++pPos; 3354 else 3355 return false; 3356 } 3357 aTheSynPath.append(sal_Unicode('/')); 3358 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3359 { 3360 EscapeType eEscapeType; 3361 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3362 '%', eMechanism, 3363 eCharset, eEscapeType); 3364 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3365 PART_URIC_NO_SLASH, '%', eCharset, true); 3366 } 3367 break; 3368 3369 case INET_PROT_TELNET: 3370 if (pPos < pEnd) 3371 { 3372 if (*pPos != '/' || pEnd - pPos > 1) 3373 return false; 3374 ++pPos; 3375 } 3376 aTheSynPath.append(sal_Unicode('/')); 3377 break; 3378 3379 case INET_PROT_VND_SUN_STAR_TDOC: 3380 if (pPos == pEnd || *pPos != '/') 3381 return false; 3382 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3383 { 3384 EscapeType eEscapeType; 3385 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3386 '%', eMechanism, 3387 eCharset, eEscapeType); 3388 if (eEscapeType == ESCAPE_NO && nUTF32 == '/') 3389 aTheSynPath.append(sal_Unicode('/')); 3390 else 3391 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3392 PART_PCHAR, '%', eCharset, false); 3393 } 3394 break; 3395 3396 case INET_PROT_GENERIC: 3397 while (pPos < pEnd && *pPos != nFragmentDelimiter) 3398 { 3399 EscapeType eEscapeType; 3400 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, 3401 '%', eMechanism, 3402 eCharset, eEscapeType); 3403 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, 3404 PART_URIC, '%', eCharset, true); 3405 } 3406 if (aTheSynPath.getLength() == 0) 3407 return false; 3408 break; 3409 default: 3410 OSL_ASSERT(false); 3411 break; 3412 } 3413 3414 *pBegin = pPos; 3415 rSynPath = aTheSynPath; 3416 return true; 3417 } 3418 3419 //============================================================================ 3420 bool INetURLObject::setPath(rtl::OUString const & rThePath, bool bOctets, 3421 EncodeMechanism eMechanism, 3422 rtl_TextEncoding eCharset) 3423 { 3424 rtl::OUStringBuffer aSynPath; 3425 sal_Unicode const * p = rThePath.getStr(); 3426 sal_Unicode const * pEnd = p + rThePath.getLength(); 3427 if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false, 3428 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath) 3429 || p != pEnd) 3430 return false; 3431 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear()); 3432 m_aQuery += nDelta; 3433 m_aFragment += nDelta; 3434 return true; 3435 } 3436 3437 //============================================================================ 3438 bool INetURLObject::checkHierarchical() const { 3439 if (m_eScheme == INET_PROT_VND_SUN_STAR_EXPAND) { 3440 OSL_ENSURE( 3441 false, "INetURLObject::checkHierarchical vnd.sun.star.expand"); 3442 return true; 3443 } else { 3444 return getSchemeInfo().m_bHierarchical; 3445 } 3446 } 3447 3448 //============================================================================ 3449 bool INetURLObject::appendSegment(rtl::OUString const & rTheSegment, 3450 bool bOctets, EncodeMechanism eMechanism, 3451 rtl_TextEncoding eCharset) 3452 { 3453 return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true, 3454 eMechanism, eCharset); 3455 } 3456 3457 //============================================================================ 3458 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex, 3459 bool bIgnoreFinalSlash) 3460 const 3461 { 3462 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT, 3463 "INetURLObject::getSegment(): Bad index"); 3464 3465 if (!checkHierarchical()) 3466 return SubString(); 3467 3468 sal_Unicode const * pPathBegin 3469 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 3470 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 3471 sal_Unicode const * pSegBegin; 3472 sal_Unicode const * pSegEnd; 3473 if (nIndex == LAST_SEGMENT) 3474 { 3475 pSegEnd = pPathEnd; 3476 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/') 3477 --pSegEnd; 3478 if (pSegEnd <= pPathBegin) 3479 return SubString(); 3480 pSegBegin = pSegEnd - 1; 3481 while (pSegBegin > pPathBegin && *pSegBegin != '/') 3482 --pSegBegin; 3483 } 3484 else 3485 { 3486 pSegBegin = pPathBegin; 3487 while (nIndex-- > 0) 3488 do 3489 { 3490 ++pSegBegin; 3491 if (pSegBegin >= pPathEnd) 3492 return SubString(); 3493 } 3494 while (*pSegBegin != '/'); 3495 pSegEnd = pSegBegin + 1; 3496 while (pSegEnd < pPathEnd && *pSegEnd != '/') 3497 ++pSegEnd; 3498 } 3499 3500 return SubString(pSegBegin - m_aAbsURIRef.getStr(), 3501 pSegEnd - pSegBegin); 3502 } 3503 3504 //============================================================================ 3505 bool INetURLObject::insertName(rtl::OUString const & rTheName, bool bOctets, 3506 bool bAppendFinalSlash, sal_Int32 nIndex, 3507 bool bIgnoreFinalSlash, 3508 EncodeMechanism eMechanism, 3509 rtl_TextEncoding eCharset) 3510 { 3511 DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT, 3512 "INetURLObject::insertName(): Bad index"); 3513 3514 if (!checkHierarchical()) 3515 return false; 3516 3517 sal_Unicode const * pPathBegin 3518 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 3519 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 3520 sal_Unicode const * pPrefixEnd; 3521 bool bInsertSlash; 3522 sal_Unicode const * pSuffixBegin; 3523 if (nIndex == LAST_SEGMENT) 3524 { 3525 pPrefixEnd = pPathEnd; 3526 if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin && 3527 pPrefixEnd[-1] == '/') 3528 { 3529 --pPrefixEnd; 3530 } 3531 bInsertSlash = bAppendFinalSlash; 3532 pSuffixBegin = pPathEnd; 3533 } 3534 else if (nIndex == 0) 3535 { 3536 pPrefixEnd = pPathBegin; 3537 bInsertSlash = 3538 (pPathBegin < pPathEnd && *pPathBegin != '/') || 3539 (pPathBegin == pPathEnd && bAppendFinalSlash); 3540 pSuffixBegin = 3541 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' && 3542 !bAppendFinalSlash && bIgnoreFinalSlash) 3543 ? pPathEnd : pPathBegin; 3544 } 3545 else 3546 { 3547 pPrefixEnd = pPathBegin; 3548 sal_Unicode const * pEnd = pPathEnd; 3549 if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/') 3550 --pEnd; 3551 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/'; 3552 bInsertSlash = false; 3553 pSuffixBegin = pPathEnd; 3554 while (nIndex-- > 0) 3555 for (;;) 3556 { 3557 if (bSkip) 3558 ++pPrefixEnd; 3559 bSkip = true; 3560 if (pPrefixEnd >= pEnd) 3561 { 3562 if (nIndex == 0) 3563 { 3564 bInsertSlash = bAppendFinalSlash; 3565 break; 3566 } 3567 else 3568 return false; 3569 } 3570 if (*pPrefixEnd == '/') 3571 { 3572 pSuffixBegin = pPrefixEnd; 3573 break; 3574 } 3575 } 3576 } 3577 3578 rtl::OUStringBuffer aNewPath; 3579 aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin); 3580 aNewPath.append(sal_Unicode('/')); 3581 aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR, getEscapePrefix(), 3582 eMechanism, eCharset, true)); 3583 if (bInsertSlash) { 3584 aNewPath.append(sal_Unicode('/')); 3585 } 3586 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin); 3587 3588 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 3589 RTL_TEXTENCODING_UTF8); 3590 } 3591 3592 //============================================================================ 3593 bool INetURLObject::clearQuery() 3594 { 3595 if (HasError()) 3596 return false; 3597 if (m_aQuery.isPresent()) 3598 { 3599 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1, 3600 m_aQuery.getLength() + 1); 3601 m_aFragment += m_aQuery.clear() - 1; 3602 } 3603 return false; 3604 } 3605 3606 //============================================================================ 3607 bool INetURLObject::setQuery(rtl::OUString const & rTheQuery, bool bOctets, 3608 EncodeMechanism eMechanism, 3609 rtl_TextEncoding eCharset) 3610 { 3611 if (!getSchemeInfo().m_bQuery) 3612 return false; 3613 rtl::OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC, 3614 getEscapePrefix(), eMechanism, eCharset, 3615 true)); 3616 sal_Int32 nDelta; 3617 if (m_aQuery.isPresent()) 3618 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery); 3619 else 3620 { 3621 m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?')); 3622 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1) 3623 + 1; 3624 } 3625 m_aFragment += nDelta; 3626 return true; 3627 } 3628 3629 //============================================================================ 3630 bool INetURLObject::clearFragment() 3631 { 3632 if (HasError()) 3633 return false; 3634 if (m_aFragment.isPresent()) 3635 { 3636 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1); 3637 m_aFragment.clear(); 3638 } 3639 return true; 3640 } 3641 3642 //============================================================================ 3643 bool INetURLObject::setFragment(rtl::OUString const & rTheFragment, 3644 bool bOctets, EncodeMechanism eMechanism, 3645 rtl_TextEncoding eCharset) 3646 { 3647 if (HasError()) 3648 return false; 3649 rtl::OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC, 3650 getEscapePrefix(), eMechanism, 3651 eCharset, true)); 3652 if (m_aFragment.isPresent()) 3653 m_aFragment.set(m_aAbsURIRef, aNewFragment); 3654 else 3655 { 3656 m_aAbsURIRef.append(sal_Unicode('#')); 3657 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength()); 3658 } 3659 return true; 3660 } 3661 3662 //============================================================================ 3663 INetURLObject::FTPType INetURLObject::getFTPType() const 3664 { 3665 if (m_eScheme == INET_PROT_FTP 3666 && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH(";type=") + 1 3667 && rtl::OUString(m_aAbsURIRef).copy( 3668 m_aPath.getEnd() - (RTL_CONSTASCII_LENGTH(";type=") + 1), 3669 RTL_CONSTASCII_LENGTH(";type=")).equalsIgnoreAsciiCaseAscii(";type=")) 3670 switch (m_aAbsURIRef.charAt(m_aPath.getEnd())) 3671 { 3672 case 'A': 3673 case 'a': 3674 return FTP_TYPE_A; 3675 3676 case 'D': 3677 case 'd': 3678 return FTP_TYPE_D; 3679 3680 case 'I': 3681 case 'i': 3682 return FTP_TYPE_I; 3683 } 3684 return FTP_TYPE_NONE; 3685 } 3686 3687 //============================================================================ 3688 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const 3689 { 3690 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 3691 return (eStyle & FSYS_DOS) != 0 3692 && m_aPath.getLength() >= 3 3693 && p[0] == '/' 3694 && INetMIME::isAlpha(p[1]) 3695 && p[2] == ':' 3696 && (m_aPath.getLength() == 3 || p[3] == '/'); 3697 } 3698 3699 //============================================================================ 3700 sal_uInt32 INetURLObject::getIMAPUID() const 3701 { 3702 if (m_eScheme == INET_PROT_IMAP 3703 && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH("/;uid=") + 1) 3704 { 3705 sal_Unicode const * pBegin = m_aAbsURIRef.getStr() 3706 + m_aPath.getBegin() 3707 + RTL_CONSTASCII_LENGTH("/;uid="); 3708 sal_Unicode const * pEnd = pBegin + m_aPath.getLength(); 3709 sal_Unicode const * p = pEnd; 3710 while (p > pBegin && INetMIME::isDigit(p[-1])) 3711 --p; 3712 if (p < pEnd && *--p != '0' 3713 && rtl::OUString(m_aAbsURIRef).copy( 3714 p - RTL_CONSTASCII_LENGTH("/;uid=") - m_aAbsURIRef.getStr(), 3715 RTL_CONSTASCII_LENGTH("/;uid=")).equalsIgnoreAsciiCaseAscii("/;uid=") 3716 ) 3717 { 3718 sal_uInt32 nUID; 3719 if (INetMIME::scanUnsigned(p, pEnd, false, nUID)) 3720 return nUID; 3721 } 3722 } 3723 return 0; 3724 } 3725 3726 //============================================================================ 3727 // static 3728 rtl::OUString INetURLObject::encodeText(sal_Unicode const * pBegin, 3729 sal_Unicode const * pEnd, bool bOctets, 3730 Part ePart, sal_Char cEscapePrefix, 3731 EncodeMechanism eMechanism, 3732 rtl_TextEncoding eCharset, 3733 bool bKeepVisibleEscapes) 3734 { 3735 rtl::OUStringBuffer aResult; 3736 while (pBegin < pEnd) 3737 { 3738 EscapeType eEscapeType; 3739 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix, 3740 eMechanism, eCharset, eEscapeType); 3741 appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart, 3742 cEscapePrefix, eCharset, bKeepVisibleEscapes); 3743 } 3744 return aResult.makeStringAndClear(); 3745 } 3746 3747 //============================================================================ 3748 // static 3749 rtl::OUString INetURLObject::decode(sal_Unicode const * pBegin, 3750 sal_Unicode const * pEnd, 3751 sal_Char cEscapePrefix, 3752 DecodeMechanism eMechanism, 3753 rtl_TextEncoding eCharset) 3754 { 3755 switch (eMechanism) 3756 { 3757 case NO_DECODE: 3758 return rtl::OUString(pBegin, pEnd - pBegin); 3759 3760 case DECODE_TO_IURI: 3761 eCharset = RTL_TEXTENCODING_UTF8; 3762 break; 3763 3764 default: 3765 break; 3766 } 3767 rtl::OUStringBuffer aResult; 3768 while (pBegin < pEnd) 3769 { 3770 EscapeType eEscapeType; 3771 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix, 3772 WAS_ENCODED, eCharset, eEscapeType); 3773 switch (eEscapeType) 3774 { 3775 case ESCAPE_NO: 3776 aResult.append(sal_Unicode(nUTF32)); 3777 break; 3778 3779 case ESCAPE_OCTET: 3780 appendEscape(aResult, cEscapePrefix, nUTF32); 3781 break; 3782 3783 case ESCAPE_UTF32: 3784 if ( 3785 INetMIME::isUSASCII(nUTF32) && 3786 ( 3787 eMechanism == DECODE_TO_IURI || 3788 ( 3789 eMechanism == DECODE_UNAMBIGUOUS && 3790 mustEncode(nUTF32, PART_UNAMBIGUOUS) 3791 ) 3792 ) 3793 ) 3794 { 3795 appendEscape(aResult, cEscapePrefix, nUTF32); 3796 } 3797 else 3798 aResult.append(sal_Unicode(nUTF32)); 3799 break; 3800 } 3801 } 3802 return aResult.makeStringAndClear(); 3803 } 3804 3805 //============================================================================ 3806 rtl::OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism, 3807 rtl_TextEncoding eCharset) const 3808 { 3809 INetURLObject aTemp(*this); 3810 aTemp.clearPassword(); 3811 return aTemp.GetMainURL(eMechanism, eCharset); 3812 } 3813 3814 //============================================================================ 3815 rtl::OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism, 3816 rtl_TextEncoding eCharset) const 3817 { 3818 INetURLObject aTemp(*this); 3819 aTemp.clearFragment(); 3820 return aTemp.GetMainURL(eMechanism, eCharset); 3821 } 3822 3823 //============================================================================ 3824 rtl::OUString 3825 INetURLObject::getAbbreviated( 3826 star::uno::Reference< star::util::XStringWidth > const & rStringWidth, 3827 sal_Int32 nWidth, 3828 DecodeMechanism eMechanism, 3829 rtl_TextEncoding eCharset) 3830 const 3831 { 3832 OSL_ENSURE(rStringWidth.is(), "specification violation"); 3833 sal_Char cEscapePrefix = getEscapePrefix(); 3834 rtl::OUStringBuffer aBuffer; 3835 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme 3836 // is empty ("") in that case, so take the scheme from m_aAbsURIRef 3837 if (m_eScheme != INET_PROT_GENERIC) 3838 { 3839 aBuffer.appendAscii(getSchemeInfo().m_pScheme); 3840 } 3841 else 3842 { 3843 if (m_aAbsURIRef) 3844 { 3845 sal_Unicode const * pSchemeBegin 3846 = m_aAbsURIRef.getStr(); 3847 sal_Unicode const * pSchemeEnd = pSchemeBegin; 3848 3849 while (pSchemeEnd[0] != ':') 3850 { 3851 ++pSchemeEnd; 3852 } 3853 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin); 3854 } 3855 } 3856 aBuffer.append(static_cast< sal_Unicode >(':')); 3857 bool bAuthority = getSchemeInfo().m_bAuthority; 3858 sal_Unicode const * pCoreBegin 3859 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() : 3860 m_aPath.getBegin()); 3861 sal_Unicode const * pCoreEnd 3862 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength(); 3863 bool bSegment = false; 3864 if (getSchemeInfo().m_bHierarchical) 3865 { 3866 rtl::OUString aRest; 3867 if (m_aQuery.isPresent()) 3868 aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("?...")); 3869 else if (m_aFragment.isPresent()) 3870 aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#...")); 3871 rtl::OUStringBuffer aTrailer; 3872 sal_Unicode const * pBegin = pCoreBegin; 3873 sal_Unicode const * pEnd = pCoreEnd; 3874 sal_Unicode const * pPrefixBegin = pBegin; 3875 sal_Unicode const * pSuffixEnd = pEnd; 3876 bool bPrefix = true; 3877 bool bSuffix = true; 3878 do 3879 { 3880 if (bSuffix) 3881 { 3882 sal_Unicode const * p = pSuffixEnd - 1; 3883 if (pSuffixEnd == pCoreEnd && *p == '/') 3884 --p; 3885 while (*p != '/') 3886 --p; 3887 if (bAuthority && p == pCoreBegin + 1) 3888 --p; 3889 rtl::OUString 3890 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ? 3891 1 : 0), 3892 pSuffixEnd, 3893 cEscapePrefix, 3894 eMechanism, 3895 eCharset)); 3896 pSuffixEnd = p; 3897 rtl::OUStringBuffer aResult(aBuffer); 3898 if (pSuffixEnd != pBegin) 3899 aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("...")); 3900 aResult.append(aSegment); 3901 aResult.append(aTrailer); 3902 aResult.append(aRest); 3903 if (rStringWidth-> 3904 queryStringWidth(aResult.makeStringAndClear()) 3905 <= nWidth) 3906 { 3907 aTrailer.insert(0, aSegment); 3908 bSegment = true; 3909 pEnd = pSuffixEnd; 3910 } 3911 else 3912 bSuffix = false; 3913 if (pPrefixBegin > pSuffixEnd) 3914 pPrefixBegin = pSuffixEnd; 3915 if (pBegin == pEnd) 3916 break; 3917 } 3918 if (bPrefix) 3919 { 3920 sal_Unicode const * p 3921 = pPrefixBegin 3922 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 : 3923 1); 3924 OSL_ASSERT(p <= pEnd); 3925 while (p < pEnd && *p != '/') 3926 ++p; 3927 if (p == pCoreEnd - 1 && *p == '/') 3928 ++p; 3929 rtl::OUString 3930 aSegment(decode(pPrefixBegin 3931 + (pPrefixBegin == pCoreBegin ? 0 : 3932 1), 3933 p == pEnd ? p : p + 1, 3934 cEscapePrefix, 3935 eMechanism, 3936 eCharset)); 3937 pPrefixBegin = p; 3938 rtl::OUStringBuffer aResult(aBuffer); 3939 aResult.append(aSegment); 3940 if (pPrefixBegin != pEnd) 3941 aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("...")); 3942 aResult.append(aTrailer); 3943 aResult.append(aRest); 3944 if (rStringWidth-> 3945 queryStringWidth(aResult.makeStringAndClear()) 3946 <= nWidth) 3947 { 3948 aBuffer.append(aSegment); 3949 bSegment = true; 3950 pBegin = pPrefixBegin; 3951 } 3952 else 3953 bPrefix = false; 3954 if (pPrefixBegin > pSuffixEnd) 3955 pSuffixEnd = pPrefixBegin; 3956 if (pBegin == pEnd) 3957 break; 3958 } 3959 } 3960 while (bPrefix || bSuffix); 3961 if (bSegment) 3962 { 3963 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd) 3964 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("...")); 3965 aBuffer.append(aTrailer); 3966 } 3967 } 3968 if (!bSegment) 3969 aBuffer.append(decode(pCoreBegin, 3970 pCoreEnd, 3971 cEscapePrefix, 3972 eMechanism, 3973 eCharset)); 3974 if (m_aQuery.isPresent()) 3975 { 3976 aBuffer.append(static_cast< sal_Unicode >('?')); 3977 aBuffer.append(decode(m_aQuery, cEscapePrefix, eMechanism, eCharset)); 3978 } 3979 if (m_aFragment.isPresent()) 3980 { 3981 aBuffer.append(static_cast< sal_Unicode >('#')); 3982 aBuffer. 3983 append(decode(m_aFragment, cEscapePrefix, eMechanism, eCharset)); 3984 } 3985 if (aBuffer.getLength() != 0) 3986 { 3987 rtl::OUStringBuffer aResult(aBuffer); 3988 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear()) 3989 > nWidth) 3990 for (sal_Int32 i = aBuffer.getLength();;) 3991 { 3992 if (i == 0) 3993 { 3994 aBuffer.setLength(aBuffer.getLength() - 1); 3995 if (aBuffer.getLength() == 0) 3996 break; 3997 } 3998 else 3999 { 4000 aBuffer.setLength(--i); 4001 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("...")); 4002 } 4003 aResult = aBuffer; 4004 if (rStringWidth-> 4005 queryStringWidth(aResult.makeStringAndClear()) 4006 <= nWidth) 4007 break; 4008 } 4009 } 4010 return aBuffer.makeStringAndClear(); 4011 } 4012 4013 //============================================================================ 4014 bool INetURLObject::operator ==(INetURLObject const & rObject) const 4015 { 4016 if (m_eScheme != rObject.m_eScheme) 4017 return false; 4018 if (m_eScheme == INET_PROT_NOT_VALID) 4019 return (m_aAbsURIRef == rObject.m_aAbsURIRef) != false; 4020 if ((m_aScheme.compare( 4021 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef) 4022 != 0) 4023 || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE) 4024 || GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE) 4025 || !GetHost(NO_DECODE).equalsIgnoreAsciiCase( 4026 rObject.GetHost(NO_DECODE)) 4027 || GetPort() != rObject.GetPort() 4028 || HasParam() != rObject.HasParam() 4029 || GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE) 4030 || GetMsgId(NO_DECODE) != rObject.GetMsgId(NO_DECODE)) 4031 return false; 4032 rtl::OUString aPath1(GetURLPath(NO_DECODE)); 4033 rtl::OUString aPath2(rObject.GetURLPath(NO_DECODE)); 4034 switch (m_eScheme) 4035 { 4036 case INET_PROT_FILE: 4037 { 4038 // If the URL paths of two file URLs only differ in that one has a 4039 // final '/' and the other has not, take the two paths as 4040 // equivalent (this could be usefull for other schemes, too): 4041 sal_Int32 nLength = aPath1.getLength(); 4042 switch (nLength - aPath2.getLength()) 4043 { 4044 case -1: 4045 if (aPath2.getStr()[nLength] != '/') 4046 return false; 4047 break; 4048 4049 case 0: 4050 break; 4051 4052 case 1: 4053 if (aPath1.getStr()[--nLength] != '/') 4054 return false; 4055 break; 4056 4057 default: 4058 return false; 4059 } 4060 return aPath1.compareTo(aPath2, nLength) == 0; 4061 } 4062 4063 default: 4064 return (aPath1 == aPath2) != false; 4065 } 4066 } 4067 4068 //============================================================================ 4069 bool INetURLObject::operator <(INetURLObject const & rObject) const 4070 { 4071 sal_Int32 nCompare = m_aScheme.compare( 4072 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef); 4073 if (nCompare < 0) { 4074 return true; 4075 } else if (nCompare > 0) { 4076 return false; 4077 } 4078 sal_uInt32 nPort1 = GetPort(); 4079 sal_uInt32 nPort2 = rObject.GetPort(); 4080 if (nPort1 < nPort2) 4081 return true; 4082 else if (nPort1 > nPort2) 4083 return false; 4084 nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE)); 4085 if (nCompare < 0) 4086 return true; 4087 else if (nCompare > 0) 4088 return false; 4089 nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE)); 4090 if (nCompare < 0) 4091 return true; 4092 else if (nCompare > 0) 4093 return false; 4094 nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE)); 4095 if (nCompare < 0) 4096 return true; 4097 else if (nCompare > 0) 4098 return false; 4099 const rtl::OUString &rPath1(GetURLPath(NO_DECODE)); 4100 const rtl::OUString &rPath2(rObject.GetURLPath(NO_DECODE)); 4101 nCompare = rPath1.compareTo(rPath2); 4102 if (nCompare < 0) 4103 return true; 4104 else if (nCompare > 0) 4105 return false; 4106 nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE)); 4107 if (nCompare < 0) 4108 return true; 4109 else if (nCompare > 0) 4110 return false; 4111 return GetMsgId(NO_DECODE).compareTo(rObject.GetMsgId(NO_DECODE)) < 0; 4112 } 4113 4114 //============================================================================ 4115 bool INetURLObject::ConcatData(INetProtocol eTheScheme, 4116 rtl::OUString const & rTheUser, 4117 rtl::OUString const & rThePassword, 4118 rtl::OUString const & rTheHost, 4119 sal_uInt32 nThePort, 4120 rtl::OUString const & rThePath, 4121 EncodeMechanism eMechanism, 4122 rtl_TextEncoding eCharset) 4123 { 4124 setInvalid(); 4125 m_eScheme = eTheScheme; 4126 if (HasError() || m_eScheme == INET_PROT_GENERIC) 4127 return false; 4128 m_aAbsURIRef.setLength(0); 4129 m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme); 4130 m_aAbsURIRef.append(sal_Unicode(':')); 4131 if (getSchemeInfo().m_bAuthority) 4132 { 4133 m_aAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 4134 bool bUserInfo = false; 4135 if (getSchemeInfo().m_bUser) 4136 { 4137 if (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0) 4138 { 4139 setInvalid(); 4140 return false; 4141 } 4142 if (rTheUser.getLength() != 0) 4143 { 4144 m_aUser.set(m_aAbsURIRef, 4145 encodeText(rTheUser, false, 4146 m_eScheme == INET_PROT_IMAP ? 4147 PART_IMAP_ACHAR : 4148 m_eScheme == INET_PROT_VIM ? 4149 PART_VIM : 4150 PART_USER_PASSWORD, 4151 getEscapePrefix(), eMechanism, 4152 eCharset, false), 4153 m_aAbsURIRef.getLength()); 4154 bUserInfo = true; 4155 } 4156 } 4157 else if (rTheUser.getLength() != 0) 4158 { 4159 setInvalid(); 4160 return false; 4161 } 4162 if (rThePassword.getLength() != 0) 4163 { 4164 if (getSchemeInfo().m_bPassword) 4165 { 4166 m_aAbsURIRef.append(sal_Unicode(':')); 4167 m_aAuth.set(m_aAbsURIRef, 4168 encodeText(rThePassword, false, 4169 m_eScheme == INET_PROT_VIM ? 4170 PART_VIM : PART_USER_PASSWORD, 4171 getEscapePrefix(), eMechanism, 4172 eCharset, false), 4173 m_aAbsURIRef.getLength()); 4174 bUserInfo = true; 4175 } 4176 else 4177 { 4178 setInvalid(); 4179 return false; 4180 } 4181 } 4182 if (bUserInfo && getSchemeInfo().m_bHost) 4183 m_aAbsURIRef.append(sal_Unicode('@')); 4184 if (getSchemeInfo().m_bHost) 4185 { 4186 rtl::OUStringBuffer aSynHost(rTheHost); 4187 bool bNetBiosName = false; 4188 switch (m_eScheme) 4189 { 4190 case INET_PROT_FILE: 4191 { 4192 rtl::OUString sTemp(aSynHost); 4193 if (sTemp.equalsIgnoreAsciiCaseAsciiL( 4194 RTL_CONSTASCII_STRINGPARAM("localhost"))) 4195 { 4196 aSynHost.setLength(0); 4197 } 4198 bNetBiosName = true; 4199 } 4200 break; 4201 4202 case INET_PROT_LDAP: 4203 if (aSynHost.getLength() == 0 && nThePort != 0) 4204 { 4205 setInvalid(); 4206 return false; 4207 } 4208 break; 4209 4210 default: 4211 if (aSynHost.getLength() == 0) 4212 { 4213 setInvalid(); 4214 return false; 4215 } 4216 break; 4217 } 4218 if (!parseHostOrNetBiosName( 4219 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(), 4220 false, eMechanism, eCharset, bNetBiosName, &aSynHost)) 4221 { 4222 setInvalid(); 4223 return false; 4224 } 4225 m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(), 4226 m_aAbsURIRef.getLength()); 4227 if (nThePort != 0) 4228 { 4229 if (getSchemeInfo().m_bPort) 4230 { 4231 m_aAbsURIRef.append(sal_Unicode(':')); 4232 m_aPort.set(m_aAbsURIRef, 4233 rtl::OUString::valueOf(sal_Int64(nThePort)), 4234 m_aAbsURIRef.getLength()); 4235 } 4236 else 4237 { 4238 setInvalid(); 4239 return false; 4240 } 4241 } 4242 } 4243 else if (rTheHost.getLength() != 0 || nThePort != 0) 4244 { 4245 setInvalid(); 4246 return false; 4247 } 4248 } 4249 rtl::OUStringBuffer aSynPath; 4250 sal_Unicode const * p = rThePath.getStr(); 4251 sal_Unicode const * pEnd = p + rThePath.getLength(); 4252 if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/', 4253 0x80000000, 0x80000000, 0x80000000, aSynPath) 4254 || p != pEnd) 4255 { 4256 setInvalid(); 4257 return false; 4258 } 4259 m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(), 4260 m_aAbsURIRef.getLength()); 4261 return true; 4262 } 4263 4264 //============================================================================ 4265 // static 4266 rtl::OUString INetURLObject::GetAbsURL(rtl::OUString const & rTheBaseURIRef, 4267 rtl::OUString const & rTheRelURIRef, 4268 bool bIgnoreFragment, 4269 EncodeMechanism eEncodeMechanism, 4270 DecodeMechanism eDecodeMechanism, 4271 rtl_TextEncoding eCharset, 4272 FSysStyle eStyle) 4273 { 4274 // Backwards compatibility: 4275 if (rTheRelURIRef.getLength() == 0 || rTheRelURIRef[0] == '#') 4276 return rTheRelURIRef; 4277 4278 INetURLObject aTheAbsURIRef; 4279 bool bWasAbsolute; 4280 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset). 4281 convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef, 4282 bWasAbsolute, eEncodeMechanism, 4283 eCharset, bIgnoreFragment, false, 4284 false, eStyle) 4285 || eEncodeMechanism != WAS_ENCODED 4286 || eDecodeMechanism != DECODE_TO_IURI 4287 || eCharset != RTL_TEXTENCODING_UTF8 ? 4288 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) : 4289 rTheRelURIRef; 4290 } 4291 4292 //============================================================================ 4293 rtl::OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism, 4294 rtl_TextEncoding eCharset) const 4295 { 4296 rtl::OUString aTheExtURIRef; 4297 translateToExternal( 4298 rtl::OUString(m_aAbsURIRef), aTheExtURIRef, eMechanism, eCharset); 4299 return aTheExtURIRef; 4300 } 4301 4302 //============================================================================ 4303 // static 4304 rtl::OUString INetURLObject::GetScheme(INetProtocol eTheScheme) 4305 { 4306 return rtl::OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix); 4307 } 4308 4309 //============================================================================ 4310 // static 4311 INetProtocol INetURLObject::CompareProtocolScheme(rtl::OUString const & 4312 rTheAbsURIRef) 4313 { 4314 sal_Unicode const * p = rTheAbsURIRef.getStr(); 4315 PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength()); 4316 return pPrefix ? pPrefix->m_eScheme : INET_PROT_NOT_VALID; 4317 } 4318 4319 //============================================================================ 4320 bool INetURLObject::hasPassword() const 4321 { 4322 return m_aAuth.isPresent() && getSchemeInfo().m_bPassword; 4323 } 4324 4325 //============================================================================ 4326 void INetURLObject::makeAuthCanonic() 4327 { 4328 if (m_eScheme == INET_PROT_IMAP && m_aAuth.getLength() == 1 4329 && m_aAbsURIRef.charAt(m_aAuth.getBegin()) == '*') 4330 { 4331 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() 4332 - RTL_CONSTASCII_LENGTH(";AUTH="), 4333 RTL_CONSTASCII_LENGTH(";AUTH=*")); 4334 sal_Int32 nDelta = m_aAuth.clear() - RTL_CONSTASCII_LENGTH(";AUTH="); 4335 m_aPath += nDelta; 4336 m_aQuery += nDelta; 4337 m_aFragment += nDelta; 4338 } 4339 } 4340 4341 //============================================================================ 4342 rtl::OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism, 4343 rtl_TextEncoding eCharset) 4344 { 4345 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and 4346 // PROT_VND_SUN_STAR_PKG misuse m_aHost: 4347 if (!getSchemeInfo().m_bHost) 4348 return rtl::OUString(); 4349 rtl::OUStringBuffer aHostPort(decode(m_aHost, getEscapePrefix(), 4350 eMechanism, eCharset)); 4351 if (m_aPort.isPresent()) 4352 { 4353 aHostPort.append(sal_Unicode(':')); 4354 aHostPort.append(decode(m_aPort, getEscapePrefix(), 4355 eMechanism, eCharset)); 4356 } 4357 return aHostPort.makeStringAndClear(); 4358 } 4359 4360 //============================================================================ 4361 sal_uInt32 INetURLObject::GetPort() const 4362 { 4363 if (m_aPort.isPresent()) 4364 { 4365 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin(); 4366 sal_Unicode const * pEnd = p + m_aPort.getLength(); 4367 sal_uInt32 nThePort; 4368 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd) 4369 return nThePort; 4370 } 4371 return 0; 4372 } 4373 4374 //============================================================================ 4375 bool INetURLObject::SetPort(sal_uInt32 nThePort) 4376 { 4377 if (getSchemeInfo().m_bPort && m_aHost.isPresent()) 4378 { 4379 rtl::OUString aNewPort(rtl::OUString::valueOf(sal_Int64(nThePort))); 4380 sal_Int32 nDelta; 4381 if (m_aPort.isPresent()) 4382 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort); 4383 else 4384 { 4385 m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':')); 4386 nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1) 4387 + 1; 4388 } 4389 m_aPath += nDelta; 4390 m_aQuery += nDelta; 4391 m_aFragment += nDelta; 4392 return true; 4393 } 4394 return false; 4395 } 4396 4397 //============================================================================ 4398 void INetURLObject::makePortCanonic() 4399 { 4400 if (m_aPort.isPresent()) 4401 { 4402 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin(); 4403 sal_Unicode const * pEnd = p + m_aPort.getLength(); 4404 sal_uInt32 nThePort; 4405 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd) 4406 { 4407 sal_Int32 nDelta; 4408 if (nThePort != 0 && nThePort == getSchemeInfo().m_nDefaultPort) 4409 { 4410 lcl_Erase(m_aAbsURIRef, m_aPort.getBegin() - 1, 4411 m_aPort.getLength() + 1); 4412 nDelta = m_aPort.clear() - 1; 4413 } 4414 else 4415 nDelta = m_aPort.set(m_aAbsURIRef, 4416 rtl::OUString::valueOf(sal_Int64(nThePort))); 4417 m_aPath += nDelta; 4418 m_aQuery += nDelta; 4419 m_aFragment += nDelta; 4420 } 4421 } 4422 } 4423 4424 //============================================================================ 4425 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const 4426 { 4427 if (!checkHierarchical()) 4428 return 0; 4429 4430 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4431 sal_Unicode const * pEnd = p + m_aPath.getLength(); 4432 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/') 4433 --pEnd; 4434 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1; 4435 while (p != pEnd) 4436 if (*p++ == '/') 4437 ++n; 4438 return n; 4439 } 4440 4441 //============================================================================ 4442 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash) 4443 { 4444 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4445 if (!aSegment.isPresent()) 4446 return false; 4447 4448 rtl::OUStringBuffer aNewPath; 4449 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(), 4450 aSegment.getBegin() - m_aPath.getBegin()); 4451 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd()) 4452 aNewPath.append(sal_Unicode('/')); 4453 else 4454 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(), 4455 m_aPath.getEnd() - aSegment.getEnd()); 4456 if (aNewPath.getLength() == 0 && !aSegment.isEmpty() && 4457 m_aAbsURIRef[aSegment.getBegin()] == '/') 4458 { 4459 aNewPath.append(sal_Unicode('/')); 4460 } 4461 4462 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4463 RTL_TEXTENCODING_UTF8); 4464 } 4465 4466 //============================================================================ 4467 rtl::OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash, 4468 DecodeMechanism eMechanism, 4469 rtl_TextEncoding eCharset) const 4470 { 4471 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4472 if (!aSegment.isPresent()) 4473 return rtl::OUString(); 4474 4475 sal_Unicode const * pSegBegin 4476 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4477 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4478 4479 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4480 ++pSegBegin; 4481 sal_Unicode const * p = pSegBegin; 4482 while (p != pSegEnd && *p != ';') 4483 ++p; 4484 4485 return decode(pSegBegin, p, getEscapePrefix(), eMechanism, eCharset); 4486 } 4487 4488 //============================================================================ 4489 bool INetURLObject::setName(rtl::OUString const & rTheName, sal_Int32 nIndex, 4490 bool bIgnoreFinalSlash, 4491 EncodeMechanism eMechanism, 4492 rtl_TextEncoding eCharset) 4493 { 4494 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4495 if (!aSegment.isPresent()) 4496 return false; 4497 4498 sal_Unicode const * pPathBegin 4499 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4500 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4501 sal_Unicode const * pSegBegin 4502 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4503 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4504 4505 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4506 ++pSegBegin; 4507 sal_Unicode const * p = pSegBegin; 4508 while (p != pSegEnd && *p != ';') 4509 ++p; 4510 4511 rtl::OUStringBuffer aNewPath; 4512 aNewPath.append(pPathBegin, pSegBegin - pPathBegin); 4513 aNewPath.append(encodeText(rTheName, false, PART_PCHAR, getEscapePrefix(), 4514 eMechanism, eCharset, true)); 4515 aNewPath.append(p, pPathEnd - p); 4516 4517 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4518 RTL_TEXTENCODING_UTF8); 4519 } 4520 4521 //============================================================================ 4522 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash) 4523 const 4524 { 4525 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4526 if (!aSegment.isPresent()) 4527 return false; 4528 4529 sal_Unicode const * pSegBegin 4530 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4531 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4532 4533 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4534 ++pSegBegin; 4535 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p) 4536 if (*p == '.' && p != pSegBegin) 4537 return true; 4538 return false; 4539 } 4540 4541 //============================================================================ 4542 rtl::OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash, 4543 DecodeMechanism eMechanism, 4544 rtl_TextEncoding eCharset) const 4545 { 4546 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4547 if (!aSegment.isPresent()) 4548 return rtl::OUString(); 4549 4550 sal_Unicode const * pSegBegin 4551 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4552 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4553 4554 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4555 ++pSegBegin; 4556 sal_Unicode const * pExtension = 0; 4557 sal_Unicode const * p = pSegBegin; 4558 for (; p != pSegEnd && *p != ';'; ++p) 4559 if (*p == '.' && p != pSegBegin) 4560 pExtension = p; 4561 if (!pExtension) 4562 pExtension = p; 4563 4564 return decode(pSegBegin, pExtension, getEscapePrefix(), eMechanism, 4565 eCharset); 4566 } 4567 4568 //============================================================================ 4569 bool INetURLObject::setBase(rtl::OUString const & rTheBase, sal_Int32 nIndex, 4570 bool bIgnoreFinalSlash, 4571 EncodeMechanism eMechanism, 4572 rtl_TextEncoding eCharset) 4573 { 4574 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4575 if (!aSegment.isPresent()) 4576 return false; 4577 4578 sal_Unicode const * pPathBegin 4579 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4580 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4581 sal_Unicode const * pSegBegin 4582 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4583 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4584 4585 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4586 ++pSegBegin; 4587 sal_Unicode const * pExtension = 0; 4588 sal_Unicode const * p = pSegBegin; 4589 for (; p != pSegEnd && *p != ';'; ++p) 4590 if (*p == '.' && p != pSegBegin) 4591 pExtension = p; 4592 if (!pExtension) 4593 pExtension = p; 4594 4595 rtl::OUStringBuffer aNewPath; 4596 aNewPath.append(pPathBegin, pSegBegin - pPathBegin); 4597 aNewPath.append(encodeText(rTheBase, false, PART_PCHAR, getEscapePrefix(), 4598 eMechanism, eCharset, true)); 4599 aNewPath.append(pExtension, pPathEnd - pExtension); 4600 4601 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4602 RTL_TEXTENCODING_UTF8); 4603 } 4604 4605 //============================================================================ 4606 rtl::OUString INetURLObject::getExtension(sal_Int32 nIndex, 4607 bool bIgnoreFinalSlash, 4608 DecodeMechanism eMechanism, 4609 rtl_TextEncoding eCharset) const 4610 { 4611 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4612 if (!aSegment.isPresent()) 4613 return rtl::OUString(); 4614 4615 sal_Unicode const * pSegBegin 4616 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4617 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4618 4619 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4620 ++pSegBegin; 4621 sal_Unicode const * pExtension = 0; 4622 sal_Unicode const * p = pSegBegin; 4623 for (; p != pSegEnd && *p != ';'; ++p) 4624 if (*p == '.' && p != pSegBegin) 4625 pExtension = p; 4626 4627 if (!pExtension) 4628 return rtl::OUString(); 4629 4630 return decode(pExtension + 1, p, getEscapePrefix(), eMechanism, eCharset); 4631 } 4632 4633 //============================================================================ 4634 bool INetURLObject::setExtension(rtl::OUString const & rTheExtension, 4635 sal_Int32 nIndex, bool bIgnoreFinalSlash, 4636 EncodeMechanism eMechanism, 4637 rtl_TextEncoding eCharset) 4638 { 4639 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4640 if (!aSegment.isPresent()) 4641 return false; 4642 4643 sal_Unicode const * pPathBegin 4644 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4645 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4646 sal_Unicode const * pSegBegin 4647 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4648 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4649 4650 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4651 ++pSegBegin; 4652 sal_Unicode const * pExtension = 0; 4653 sal_Unicode const * p = pSegBegin; 4654 for (; p != pSegEnd && *p != ';'; ++p) 4655 if (*p == '.' && p != pSegBegin) 4656 pExtension = p; 4657 if (!pExtension) 4658 pExtension = p; 4659 4660 rtl::OUStringBuffer aNewPath; 4661 aNewPath.append(pPathBegin, pExtension - pPathBegin); 4662 aNewPath.append(sal_Unicode('.')); 4663 aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR, 4664 getEscapePrefix(), eMechanism, eCharset, true)); 4665 aNewPath.append(p, pPathEnd - p); 4666 4667 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4668 RTL_TEXTENCODING_UTF8); 4669 } 4670 4671 //============================================================================ 4672 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash) 4673 { 4674 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash)); 4675 if (!aSegment.isPresent()) 4676 return false; 4677 4678 sal_Unicode const * pPathBegin 4679 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4680 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4681 sal_Unicode const * pSegBegin 4682 = m_aAbsURIRef.getStr() + aSegment.getBegin(); 4683 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength(); 4684 4685 if (pSegBegin < pSegEnd && *pSegBegin == '/') 4686 ++pSegBegin; 4687 sal_Unicode const * pExtension = 0; 4688 sal_Unicode const * p = pSegBegin; 4689 for (; p != pSegEnd && *p != ';'; ++p) 4690 if (*p == '.' && p != pSegBegin) 4691 pExtension = p; 4692 if (!pExtension) 4693 return true; 4694 4695 rtl::OUStringBuffer aNewPath; 4696 aNewPath.append(pPathBegin, pExtension - pPathBegin); 4697 aNewPath.append(p, pPathEnd - p); 4698 4699 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4700 RTL_TEXTENCODING_UTF8); 4701 } 4702 4703 //============================================================================ 4704 bool INetURLObject::hasFinalSlash() const 4705 { 4706 if (!checkHierarchical()) 4707 return false; 4708 4709 sal_Unicode const * pPathBegin 4710 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4711 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4712 return pPathEnd > pPathBegin && pPathEnd[-1] == '/'; 4713 } 4714 4715 //============================================================================ 4716 bool INetURLObject::setFinalSlash() 4717 { 4718 if (!checkHierarchical()) 4719 return false; 4720 4721 sal_Unicode const * pPathBegin 4722 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4723 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4724 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/') 4725 return true; 4726 4727 rtl::OUStringBuffer aNewPath; 4728 aNewPath.append(pPathBegin, pPathEnd - pPathBegin); 4729 aNewPath.append(sal_Unicode('/')); 4730 4731 return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC, 4732 RTL_TEXTENCODING_UTF8); 4733 } 4734 4735 //============================================================================ 4736 bool INetURLObject::removeFinalSlash() 4737 { 4738 if (!checkHierarchical()) 4739 return false; 4740 4741 sal_Unicode const * pPathBegin 4742 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 4743 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength(); 4744 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/') 4745 return true; 4746 4747 --pPathEnd; 4748 if (pPathEnd == pPathBegin && *pPathBegin == '/') 4749 return false; 4750 rtl::OUString aNewPath(pPathBegin, pPathEnd - pPathBegin); 4751 4752 return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8); 4753 } 4754 4755 //============================================================================ 4756 // static 4757 rtl::OUString INetURLObject::createFragment(rtl::OUString const & rText) 4758 { 4759 rtl::OUString aFragment(rText); 4760 for (sal_Int32 i = 0; i < aFragment.getLength();) 4761 { 4762 sal_Unicode c = aFragment.getStr()[i]; 4763 if (mustEncode(c, PART_CREATEFRAGMENT)) 4764 aFragment = aFragment.replaceAt(i, 1, rtl::OUString()); 4765 else 4766 ++i; 4767 } 4768 return aFragment; 4769 } 4770 4771 //============================================================================ 4772 bool INetURLObject::setFSysPath(rtl::OUString const & rFSysPath, 4773 FSysStyle eStyle) 4774 { 4775 sal_Unicode const * pFSysBegin = rFSysPath.getStr(); 4776 sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength(); 4777 4778 switch ((eStyle & FSYS_VOS ? 1 : 0) 4779 + (eStyle & FSYS_UNX ? 1 : 0) 4780 + (eStyle & FSYS_DOS ? 1 : 0) 4781 + (eStyle & FSYS_MAC ? 1 : 0)) 4782 { 4783 case 0: 4784 return false; 4785 4786 case 1: 4787 break; 4788 4789 default: 4790 if (eStyle & FSYS_VOS 4791 && pFSysEnd - pFSysBegin >= 2 4792 && pFSysBegin[0] == '/' 4793 && pFSysBegin[1] == '/') 4794 { 4795 if (pFSysEnd - pFSysBegin >= 3 4796 && pFSysBegin[2] == '.' 4797 && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/')) 4798 { 4799 eStyle = FSYS_VOS; // Production T1 4800 break; 4801 } 4802 4803 sal_Unicode const * p = pFSysBegin + 2; 4804 rtl::OUString aHost; 4805 if (parseHost(p, pFSysEnd, aHost) 4806 && (p == pFSysEnd || *p == '/')) 4807 { 4808 eStyle = FSYS_VOS; // Production T2 4809 break; 4810 } 4811 } 4812 4813 if (eStyle & FSYS_DOS 4814 && pFSysEnd - pFSysBegin >= 2 4815 && pFSysBegin[0] == '\\' 4816 && pFSysBegin[1] == '\\') 4817 { 4818 sal_Unicode const * p = pFSysBegin + 2; 4819 rtl::OUString aHost; 4820 if (parseHost(p, pFSysEnd, aHost) 4821 && (p == pFSysEnd || *p == '\\')) 4822 { 4823 eStyle = FSYS_DOS; // Production T3 4824 break; 4825 } 4826 } 4827 4828 if (eStyle & FSYS_DOS 4829 && pFSysEnd - pFSysBegin >= 2 4830 && INetMIME::isAlpha(pFSysBegin[0]) 4831 && pFSysBegin[1] == ':' 4832 && (pFSysEnd - pFSysBegin == 2 4833 || pFSysBegin[2] == '/' 4834 || pFSysBegin[2] == '\\')) 4835 { 4836 eStyle = FSYS_DOS; // Productions T4, T5 4837 break; 4838 } 4839 4840 if (!(eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC))) 4841 return false; 4842 4843 eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle); 4844 // Production T6 4845 break; 4846 } 4847 4848 rtl::OUStringBuffer aSynAbsURIRef(rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("file://"))); 4849 4850 switch (eStyle) 4851 { 4852 case FSYS_VOS: 4853 { 4854 sal_Unicode const * p = pFSysBegin; 4855 if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/') 4856 return false; 4857 if (p != pFSysEnd && *p == '.' 4858 && (pFSysEnd - p == 1 || p[1] == '/')) 4859 ++p; 4860 for (; p != pFSysEnd; ++p) 4861 switch (*p) 4862 { 4863 case '#': 4864 case '%': 4865 appendEscape(aSynAbsURIRef, '%', *p); 4866 break; 4867 4868 default: 4869 aSynAbsURIRef.append(*p); 4870 break; 4871 } 4872 break; 4873 } 4874 4875 case FSYS_UNX: 4876 { 4877 sal_Unicode const * p = pFSysBegin; 4878 if (p != pFSysEnd && *p != '/') 4879 return false; 4880 for (; p != pFSysEnd; ++p) 4881 switch (*p) 4882 { 4883 case '|': 4884 case '#': 4885 case '%': 4886 appendEscape(aSynAbsURIRef, '%', *p); 4887 break; 4888 4889 default: 4890 aSynAbsURIRef.append(*p); 4891 break; 4892 } 4893 break; 4894 } 4895 4896 case FSYS_DOS: 4897 { 4898 sal_uInt32 nAltDelimiter = 0x80000000; 4899 sal_Unicode const * p = pFSysBegin; 4900 if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\') 4901 p += 2; 4902 else 4903 { 4904 aSynAbsURIRef.append(sal_Unicode('/')); 4905 if (pFSysEnd - p >= 2 4906 && INetMIME::isAlpha(p[0]) 4907 && p[1] == ':' 4908 && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/')) 4909 nAltDelimiter = '/'; 4910 } 4911 for (; p != pFSysEnd; ++p) 4912 if (*p == '\\' || *p == nAltDelimiter) 4913 aSynAbsURIRef.append(sal_Unicode('/')); 4914 else 4915 switch (*p) 4916 { 4917 case '/': 4918 case '#': 4919 case '%': 4920 appendEscape(aSynAbsURIRef, '%', *p); 4921 break; 4922 4923 default: 4924 aSynAbsURIRef.append(*p); 4925 break; 4926 } 4927 break; 4928 } 4929 4930 case FSYS_MAC: 4931 aSynAbsURIRef.append(sal_Unicode('/')); 4932 {for (sal_Unicode const * p = pFSysBegin; p != pFSysEnd; ++p) 4933 switch (*p) 4934 { 4935 case ':': 4936 aSynAbsURIRef.append(sal_Unicode('/')); 4937 break; 4938 4939 case '/': 4940 case '|': 4941 case '#': 4942 case '%': 4943 appendEscape(aSynAbsURIRef, '%', *p); 4944 break; 4945 4946 default: 4947 aSynAbsURIRef.append(*p); 4948 break; 4949 } 4950 } 4951 break; 4952 4953 default: 4954 OSL_ASSERT(false); 4955 break; 4956 } 4957 4958 INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED, 4959 RTL_TEXTENCODING_UTF8); 4960 if (aTemp.HasError()) 4961 return false; 4962 4963 *this = aTemp; 4964 return true; 4965 } 4966 4967 //============================================================================ 4968 rtl::OUString INetURLObject::getFSysPath(FSysStyle eStyle, 4969 sal_Unicode * pDelimiter) const 4970 { 4971 if (m_eScheme != INET_PROT_FILE) 4972 return rtl::OUString(); 4973 4974 if ((eStyle & FSYS_VOS ? 1 : 0) 4975 + (eStyle & FSYS_UNX ? 1 : 0) 4976 + (eStyle & FSYS_DOS ? 1 : 0) 4977 + (eStyle & FSYS_MAC ? 1 : 0) 4978 > 1) 4979 { 4980 eStyle = eStyle & FSYS_VOS 4981 && m_aHost.isPresent() 4982 && m_aHost.getLength() > 0 ? 4983 FSYS_VOS : 4984 hasDosVolume(eStyle) 4985 || ((eStyle & FSYS_DOS) != 0 4986 && m_aHost.isPresent() 4987 && m_aHost.getLength() > 0) ? 4988 FSYS_DOS : 4989 eStyle & FSYS_UNX 4990 && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ? 4991 FSYS_UNX : 4992 FSysStyle(0); 4993 } 4994 4995 switch (eStyle) 4996 { 4997 case FSYS_VOS: 4998 { 4999 if (pDelimiter) 5000 *pDelimiter = '/'; 5001 5002 rtl::OUStringBuffer aSynFSysPath; 5003 aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 5004 if (m_aHost.isPresent() && m_aHost.getLength() > 0) 5005 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET, 5006 RTL_TEXTENCODING_UTF8)); 5007 else 5008 aSynFSysPath.append(sal_Unicode('.')); 5009 aSynFSysPath.append(decode(m_aPath, '%', DECODE_WITH_CHARSET, 5010 RTL_TEXTENCODING_UTF8)); 5011 return aSynFSysPath.makeStringAndClear(); 5012 } 5013 5014 case FSYS_UNX: 5015 { 5016 if (m_aHost.isPresent() && m_aHost.getLength() > 0) 5017 return rtl::OUString(); 5018 5019 if (pDelimiter) 5020 *pDelimiter = '/'; 5021 5022 return decode(m_aPath, '%', DECODE_WITH_CHARSET, 5023 RTL_TEXTENCODING_UTF8); 5024 } 5025 5026 case FSYS_DOS: 5027 { 5028 if (pDelimiter) 5029 *pDelimiter = '\\'; 5030 5031 rtl::OUStringBuffer aSynFSysPath; 5032 if (m_aHost.isPresent() && m_aHost.getLength() > 0) 5033 { 5034 aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\\\")); 5035 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET, 5036 RTL_TEXTENCODING_UTF8)); 5037 aSynFSysPath.append(sal_Unicode('\\')); 5038 } 5039 sal_Unicode const * p 5040 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 5041 sal_Unicode const * pEnd = p + m_aPath.getLength(); 5042 DBG_ASSERT(p < pEnd && *p == '/', 5043 "INetURLObject::getFSysPath(): Bad path"); 5044 ++p; 5045 while (p < pEnd) 5046 { 5047 EscapeType eEscapeType; 5048 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED, 5049 RTL_TEXTENCODING_UTF8, 5050 eEscapeType); 5051 if (eEscapeType == ESCAPE_NO && nUTF32 == '/') 5052 aSynFSysPath.append(sal_Unicode('\\')); 5053 else 5054 aSynFSysPath.appendUtf32(nUTF32); 5055 } 5056 return aSynFSysPath.makeStringAndClear(); 5057 } 5058 5059 case FSYS_MAC: 5060 { 5061 if (m_aHost.isPresent() && m_aHost.getLength() > 0) 5062 return rtl::OUString(); 5063 5064 if (pDelimiter) 5065 *pDelimiter = ':'; 5066 5067 rtl::OUStringBuffer aSynFSysPath; 5068 sal_Unicode const * p 5069 = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 5070 sal_Unicode const * pEnd = p + m_aPath.getLength(); 5071 DBG_ASSERT(p < pEnd && *p == '/', 5072 "INetURLObject::getFSysPath(): Bad path"); 5073 ++p; 5074 while (p < pEnd) 5075 { 5076 EscapeType eEscapeType; 5077 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED, 5078 RTL_TEXTENCODING_UTF8, 5079 eEscapeType); 5080 if (eEscapeType == ESCAPE_NO && nUTF32 == '/') 5081 aSynFSysPath.append(sal_Unicode(':')); 5082 else 5083 aSynFSysPath.appendUtf32(nUTF32); 5084 } 5085 return aSynFSysPath.makeStringAndClear(); 5086 } 5087 5088 default: 5089 return rtl::OUString(); 5090 } 5091 } 5092 5093 //============================================================================ 5094 bool INetURLObject::HasMsgId() const 5095 { 5096 if (m_eScheme != INET_PROT_POP3) 5097 return false; 5098 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 5099 sal_Unicode const * pEnd = p + m_aPath.getLength(); 5100 for (; p < pEnd; ++p) 5101 if (*p == '<') 5102 return true; 5103 return false; 5104 } 5105 5106 //============================================================================ 5107 rtl::OUString INetURLObject::GetMsgId(DecodeMechanism eMechanism, 5108 rtl_TextEncoding eCharset) const 5109 { 5110 if (m_eScheme != INET_PROT_POP3) 5111 return rtl::OUString(); 5112 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin(); 5113 sal_Unicode const * pEnd = p + m_aPath.getLength(); 5114 for (; p < pEnd; ++p) 5115 if (*p == '<') 5116 return decode(p, pEnd, getEscapePrefix(), eMechanism, eCharset); 5117 return rtl::OUString(); 5118 } 5119 5120 //============================================================================ 5121 // static 5122 void INetURLObject::appendUCS4Escape(rtl::OUStringBuffer & rTheText, 5123 sal_Char cEscapePrefix, sal_uInt32 nUCS4) 5124 { 5125 DBG_ASSERT(nUCS4 < 0x80000000, 5126 "INetURLObject::appendUCS4Escape(): Bad char"); 5127 if (nUCS4 < 0x80) 5128 appendEscape(rTheText, cEscapePrefix, nUCS4); 5129 else if (nUCS4 < 0x800) 5130 { 5131 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0); 5132 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80); 5133 } 5134 else if (nUCS4 < 0x10000) 5135 { 5136 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0); 5137 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80); 5138 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80); 5139 } 5140 else if (nUCS4 < 0x200000) 5141 { 5142 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0); 5143 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80); 5144 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80); 5145 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80); 5146 } 5147 else if (nUCS4 < 0x4000000) 5148 { 5149 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8); 5150 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80); 5151 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80); 5152 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80); 5153 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80); 5154 } 5155 else 5156 { 5157 appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC); 5158 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 24 & 0x3F) | 0x80); 5159 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80); 5160 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80); 5161 appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80); 5162 appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80); 5163 } 5164 } 5165 5166 //============================================================================ 5167 // static 5168 void INetURLObject::appendUCS4(rtl::OUStringBuffer& rTheText, sal_uInt32 nUCS4, 5169 EscapeType eEscapeType, bool bOctets, 5170 Part ePart, sal_Char cEscapePrefix, 5171 rtl_TextEncoding eCharset, 5172 bool bKeepVisibleEscapes) 5173 { 5174 bool bEscape; 5175 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW; 5176 switch (eEscapeType) 5177 { 5178 case ESCAPE_NO: 5179 if (mustEncode(nUCS4, ePart)) 5180 { 5181 bEscape = true; 5182 eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 : 5183 RTL_TEXTENCODING_UTF8; 5184 } 5185 else 5186 bEscape = false; 5187 break; 5188 5189 case ESCAPE_OCTET: 5190 bEscape = true; 5191 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1; 5192 break; 5193 5194 case ESCAPE_UTF32: 5195 if (mustEncode(nUCS4, ePart)) 5196 { 5197 bEscape = true; 5198 eTargetCharset = eCharset; 5199 } 5200 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4)) 5201 { 5202 bEscape = true; 5203 eTargetCharset = RTL_TEXTENCODING_ASCII_US; 5204 } 5205 else 5206 bEscape = false; 5207 break; 5208 default: 5209 bEscape = false; 5210 } 5211 5212 if (bEscape) 5213 { 5214 switch (eTargetCharset) 5215 { 5216 default: 5217 DBG_ERROR("INetURLObject::appendUCS4(): Unsupported charset"); 5218 case RTL_TEXTENCODING_ASCII_US: 5219 case RTL_TEXTENCODING_ISO_8859_1: 5220 appendEscape(rTheText, cEscapePrefix, nUCS4); 5221 break; 5222 5223 case RTL_TEXTENCODING_UTF8: 5224 appendUCS4Escape(rTheText, cEscapePrefix, nUCS4); 5225 break; 5226 } 5227 } 5228 else 5229 rTheText.append(sal_Unicode(nUCS4)); 5230 } 5231 5232 //============================================================================ 5233 // static 5234 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin, 5235 sal_Unicode const * pEnd, bool bOctets, 5236 sal_Char cEscapePrefix, 5237 EncodeMechanism eMechanism, 5238 rtl_TextEncoding eCharset, 5239 EscapeType & rEscapeType) 5240 { 5241 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence"); 5242 sal_uInt32 nUTF32 = bOctets ? *rBegin++ : 5243 INetMIME::getUTF32Character(rBegin, pEnd); 5244 switch (eMechanism) 5245 { 5246 case ENCODE_ALL: 5247 rEscapeType = ESCAPE_NO; 5248 break; 5249 5250 case WAS_ENCODED: 5251 { 5252 int nWeight1; 5253 int nWeight2; 5254 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd 5255 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0 5256 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0) 5257 { 5258 rBegin += 2; 5259 nUTF32 = nWeight1 << 4 | nWeight2; 5260 switch (eCharset) 5261 { 5262 default: 5263 DBG_ERROR( 5264 "INetURLObject::getUTF32(): Unsupported charset"); 5265 case RTL_TEXTENCODING_ASCII_US: 5266 rEscapeType = INetMIME::isUSASCII(nUTF32) ? 5267 ESCAPE_UTF32 : ESCAPE_OCTET; 5268 break; 5269 5270 case RTL_TEXTENCODING_ISO_8859_1: 5271 rEscapeType = ESCAPE_UTF32; 5272 break; 5273 5274 case RTL_TEXTENCODING_UTF8: 5275 if (INetMIME::isUSASCII(nUTF32)) 5276 rEscapeType = ESCAPE_UTF32; 5277 else 5278 { 5279 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4) 5280 { 5281 sal_uInt32 nEncoded; 5282 int nShift; 5283 sal_uInt32 nMin; 5284 if (nUTF32 <= 0xDF) 5285 { 5286 nEncoded = (nUTF32 & 0x1F) << 6; 5287 nShift = 0; 5288 nMin = 0x80; 5289 } 5290 else if (nUTF32 <= 0xEF) 5291 { 5292 nEncoded = (nUTF32 & 0x0F) << 12; 5293 nShift = 6; 5294 nMin = 0x800; 5295 } 5296 else 5297 { 5298 nEncoded = (nUTF32 & 0x07) << 18; 5299 nShift = 12; 5300 nMin = 0x10000; 5301 } 5302 sal_Unicode const * p = rBegin; 5303 bool bUTF8 = true; 5304 for (;;) 5305 { 5306 if (pEnd - p < 3 5307 || p[0] != cEscapePrefix 5308 || (nWeight1 5309 = INetMIME::getHexWeight(p[1])) 5310 < 8 5311 || nWeight1 > 11 5312 || (nWeight2 5313 = INetMIME::getHexWeight(p[2])) 5314 < 0) 5315 { 5316 bUTF8 = false; 5317 break; 5318 } 5319 p += 3; 5320 nEncoded 5321 |= ((nWeight1 & 3) << 4 | nWeight2) 5322 << nShift; 5323 if (nShift == 0) 5324 break; 5325 nShift -= 6; 5326 } 5327 if (bUTF8 && nEncoded >= nMin 5328 && !INetMIME::isHighSurrogate(nEncoded) 5329 && !INetMIME::isLowSurrogate(nEncoded) 5330 && nEncoded <= 0x10FFFF) 5331 { 5332 rBegin = p; 5333 nUTF32 = nEncoded; 5334 rEscapeType = ESCAPE_UTF32; 5335 break; 5336 } 5337 } 5338 rEscapeType = ESCAPE_OCTET; 5339 } 5340 break; 5341 } 5342 } 5343 else 5344 rEscapeType = ESCAPE_NO; 5345 break; 5346 } 5347 5348 case NOT_CANONIC: 5349 { 5350 int nWeight1; 5351 int nWeight2; 5352 if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd 5353 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0) 5354 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)) 5355 { 5356 rBegin += 2; 5357 nUTF32 = nWeight1 << 4 | nWeight2; 5358 rEscapeType = ESCAPE_OCTET; 5359 } 5360 else 5361 rEscapeType = ESCAPE_NO; 5362 break; 5363 } 5364 } 5365 return nUTF32; 5366 } 5367 5368 //============================================================================ 5369 // static 5370 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin, 5371 sal_Unicode const * pEnd, 5372 bool bEager) 5373 { 5374 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN }; 5375 State eState = STATE_DOT; 5376 sal_Int32 nLabels = 0; 5377 sal_Unicode const * pLastAlphanumeric = 0; 5378 for (sal_Unicode const * p = rBegin;; ++p) 5379 switch (eState) 5380 { 5381 case STATE_DOT: 5382 if (p != pEnd && INetMIME::isAlphanumeric(*p)) 5383 { 5384 ++nLabels; 5385 eState = STATE_LABEL; 5386 break; 5387 } 5388 if (bEager || nLabels == 0) 5389 return 0; 5390 rBegin = p - 1; 5391 return nLabels; 5392 5393 case STATE_LABEL: 5394 if (p != pEnd) 5395 { 5396 if (INetMIME::isAlphanumeric(*p)) 5397 break; 5398 else if (*p == '.') 5399 { 5400 eState = STATE_DOT; 5401 break; 5402 } 5403 else if (*p == '-') 5404 { 5405 pLastAlphanumeric = p; 5406 eState = STATE_HYPHEN; 5407 break; 5408 } 5409 } 5410 rBegin = p; 5411 return nLabels; 5412 5413 case STATE_HYPHEN: 5414 if (p != pEnd) 5415 { 5416 if (INetMIME::isAlphanumeric(*p)) 5417 { 5418 eState = STATE_LABEL; 5419 break; 5420 } 5421 else if (*p == '-') 5422 break; 5423 } 5424 if (bEager) 5425 return 0; 5426 rBegin = pLastAlphanumeric; 5427 return nLabels; 5428 } 5429 } 5430 5431 //============================================================================ 5432 // static 5433 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin, 5434 sal_Unicode const * pEnd) 5435 { 5436 if (rBegin != pEnd && *rBegin == '[') { 5437 sal_Unicode const * p = rBegin + 1; 5438 //TODO: check for valid IPv6address (RFC 2373): 5439 while (p != pEnd && (INetMIME::isHexDigit(*p) || *p == ':' || *p == '.')) 5440 { 5441 ++p; 5442 } 5443 if (p != pEnd && *p == ']') { 5444 rBegin = p + 1; 5445 return true; 5446 } 5447 } 5448 return false; 5449 } 5450 5451 //============================================================================ 5452 rtl::OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism, 5453 rtl_TextEncoding eCharset) 5454 const 5455 { 5456 if (!checkHierarchical()) 5457 return rtl::OUString(); 5458 INetURLObject aTemp(*this); 5459 aTemp.clearFragment(); 5460 aTemp.clearQuery(); 5461 aTemp.removeSegment(LAST_SEGMENT, false); 5462 aTemp.setFinalSlash(); 5463 return aTemp.GetMainURL(eMechanism, eCharset); 5464 } 5465 5466 //============================================================================ 5467 rtl::OUString INetURLObject::GetLastName(DecodeMechanism eMechanism, 5468 rtl_TextEncoding eCharset) const 5469 { 5470 return getName(LAST_SEGMENT, true, eMechanism, eCharset); 5471 } 5472 5473 //============================================================================ 5474 rtl::OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism, 5475 rtl_TextEncoding eCharset) const 5476 { 5477 return getExtension(LAST_SEGMENT, false, eMechanism, eCharset); 5478 } 5479 5480 //============================================================================ 5481 bool INetURLObject::CutLastName() 5482 { 5483 INetURLObject aTemp(*this); 5484 aTemp.clearFragment(); 5485 aTemp.clearQuery(); 5486 if (!aTemp.removeSegment(LAST_SEGMENT, false)) 5487 return false; 5488 *this = aTemp; 5489 return true; 5490 } 5491 5492 //============================================================================ 5493 rtl::OUString INetURLObject::PathToFileName() const 5494 { 5495 if (m_eScheme != INET_PROT_FILE) 5496 return rtl::OUString(); 5497 rtl::OUString aSystemPath; 5498 if (osl::FileBase::getSystemPathFromFileURL( 5499 decode(m_aAbsURIRef.getStr(), 5500 m_aAbsURIRef.getStr() + m_aPath.getEnd(), 5501 getEscapePrefix(), NO_DECODE, RTL_TEXTENCODING_UTF8), 5502 aSystemPath) 5503 != osl::FileBase::E_None) 5504 return rtl::OUString(); 5505 return aSystemPath; 5506 } 5507 5508 //============================================================================ 5509 rtl::OUString INetURLObject::GetFull() const 5510 { 5511 INetURLObject aTemp(*this); 5512 aTemp.removeFinalSlash(); 5513 return aTemp.PathToFileName(); 5514 } 5515 5516 //============================================================================ 5517 rtl::OUString INetURLObject::GetPath() const 5518 { 5519 INetURLObject aTemp(*this); 5520 aTemp.removeSegment(LAST_SEGMENT, true); 5521 aTemp.removeFinalSlash(); 5522 return aTemp.PathToFileName(); 5523 } 5524 5525 //============================================================================ 5526 void INetURLObject::SetBase(rtl::OUString const & rTheBase) 5527 { 5528 setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL); 5529 } 5530 5531 //============================================================================ 5532 rtl::OUString INetURLObject::GetBase() const 5533 { 5534 return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET); 5535 } 5536 5537 //============================================================================ 5538 void INetURLObject::SetName(rtl::OUString const & rTheName, 5539 EncodeMechanism eMechanism, 5540 rtl_TextEncoding eCharset) 5541 { 5542 INetURLObject aTemp(*this); 5543 if (aTemp.removeSegment(LAST_SEGMENT, true) 5544 && aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism, 5545 eCharset)) 5546 *this = aTemp; 5547 } 5548 5549 //============================================================================ 5550 rtl::OUString INetURLObject::CutName(DecodeMechanism eMechanism, 5551 rtl_TextEncoding eCharset) 5552 { 5553 rtl::OUString aTheName(getName(LAST_SEGMENT, true, eMechanism, eCharset)); 5554 return removeSegment(LAST_SEGMENT, true) ? aTheName : rtl::OUString(); 5555 } 5556 5557 //============================================================================ 5558 void INetURLObject::SetExtension(rtl::OUString const & rTheExtension, 5559 EncodeMechanism eMechanism, 5560 rtl_TextEncoding eCharset) 5561 { 5562 setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset); 5563 } 5564 5565 //============================================================================ 5566 rtl::OUString INetURLObject::CutExtension(DecodeMechanism eMechanism, 5567 rtl_TextEncoding eCharset) 5568 { 5569 rtl::OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism, 5570 eCharset)); 5571 return removeExtension(LAST_SEGMENT, false) 5572 ? aTheExtension : rtl::OUString(); 5573 } 5574 5575 //============================================================================ 5576 bool INetURLObject::IsCaseSensitive() const 5577 { 5578 return true; 5579 } 5580