1: # -*- perl -*- vim: ft=perl 2eval 'exec perl -w -S $0 ${1+"$@"}' 3if 0; 4#************************************************************** 5# 6# Licensed to the Apache Software Foundation (ASF) under one 7# or more contributor license agreements. See the NOTICE file 8# distributed with this work for additional information 9# regarding copyright ownership. The ASF licenses this file 10# to you under the Apache License, Version 2.0 (the 11# "License"); you may not use this file except in compliance 12# with the License. You may obtain a copy of the License at 13# 14# http://www.apache.org/licenses/LICENSE-2.0 15# 16# Unless required by applicable law or agreed to in writing, 17# software distributed under the License is distributed on an 18# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 19# KIND, either express or implied. See the License for the 20# specific language governing permissions and limitations 21# under the License. 22# 23#************************************************************** 24 25 26 27# See Usage() below or invoke without arguments for short instructions. 28# For long instructions use the source, Luke ;-) 29 30use strict; 31 32sub Usage() 33{ 34 print STDERR 35 "\n", 36 "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", 37 "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", 38 "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n", 39 "msi-encodinglist.txt\n\n", 40 41 "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", 42 43 "A language string will be used as a generic string match in all searched files.\n", 44 "You may enclose the language string in word delimiters,\n", 45 "e.g. \\blanguage_german\\b for a specific match.\n", 46 "If the language string expression matches more than one define,\n", 47 "e.g. as in 'german', all matching defines will be processed.\n", 48 "If the language string does not match a define or an identifier in\n", 49 "langtab.src, a generic string match of the listbox entries will be tried.\n\n", 50 51 "Numeric values of LangID,primarylanguage,sublanguage can be given\n", 52 "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n", 53 "The exact language_define of an exact match will be used in remaining lookups.\n\n", 54 55 "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n", 56 "for example: 'en-US' or 'de-' or '-CH',\n", 57 "xx and YY can be given case insensitive, will be lowered-uppered internally,\n", 58 "and xx and YY themselfs may be regular expressions.\n", 59 "Also here a list of matches will be processed.\n\n", 60 61 "If option --single is given, only the first match will be processed.\n\n"; 62} 63 64my $SOLARVERSION = $ENV{"SOLARVERSION"}; 65my $INPATH = $ENV{"INPATH"}; 66my $SRC_ROOT = $ENV{"SRC_ROOT"}; 67my $UPDMINOREXT = $ENV{"UPDMINOREXT"}; 68if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT)) 69{ 70 print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n"; 71 Usage(); 72 exit 1; 73} 74if (!defined($UPDMINOREXT)) { 75 $UPDMINOREXT = ''; 76} 77my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT"; 78 79my $LANGUAGE_MASK_PRIMARY = 0x03ff; 80 81sub getPrimaryLanguage($) 82{ 83 my($lcid) = @_; 84 return $lcid & $LANGUAGE_MASK_PRIMARY; 85} 86 87sub getSubLanguage($) 88{ 89 my($lcid) = @_; 90 return $lcid >> 10; 91} 92 93sub makeLangID($$) 94{ 95 my( $sub, $pri) = @_; 96 return ($sub << 10) | $pri; 97} 98 99 100sub grepFile($$$$@) 101{ 102 my( $regex, $path, $module, $name, @addregex) = @_; 103 my @result; 104 my $found = 0; 105 my $areopen = 0; 106 my $arecloser = ''; 107 my $file; 108 # Try module under current working directory first to catch local 109 # modifications. A Not yet delivered lang.h is a special case. 110 if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") { 111 $file = "./$module/inc/i18npool/lang.h"; } 112 else { 113 $file = "./$module/$name"; } 114 if (!($found = open( IN, $file))) 115 { 116 # Then with the given path. 117 $file = "$path/$module/$name"; 118 if (!($found = open( IN, $file))) 119 { 120 print "No $file\n"; 121 $file = "$path/$module.lnk/$name"; 122 if (!($found = open( IN, $file))) { 123 print "No $file.\n"; 124 $file = "$path/$module.link/$name"; 125 if (!($found = open( IN, $file))) { 126 print "No $file either.\n"; } 127 } 128 } 129 } 130 if ($found) 131 { 132 $found = 0; 133 while (my $line = <IN>) 134 { 135 if ($line =~ /$regex/) 136 { 137 if (!$found) 138 { 139 $found = 1; 140 print "$file:\n"; 141 } 142 chomp( $line); 143 print "$line\n"; 144 push( @result, $line); 145 } 146 elsif (@addregex) 147 { 148 # By convention first element is opener, second element is closer. 149 if (!$areopen) 150 { 151 if ($line =~ /$addregex[0]/) 152 { 153 $areopen = 1; 154 $arecloser = $addregex[1]; 155 } 156 } 157 if ($areopen) 158 { 159 for (my $i = 2; $i < @addregex; ++$i) 160 { 161 if ($line =~ /$addregex[$i]/) 162 { 163 if (!$found) 164 { 165 $found = 1; 166 print "$file:\n"; 167 } 168 chomp( $line); 169 print "$line\n"; 170 push( @result, $line); 171 } 172 } 173 if ($line =~ /$arecloser/) 174 { 175 $areopen = 0; 176 } 177 } 178 } 179 } 180 close( IN); 181 } 182 if (!$found) { 183 print "Not found in $file\n"; 184 #print "Not found in $file for $regex @addregex\n"; 185 } 186 return @result; 187} 188 189 190sub main() 191{ 192 my( $lcid, @parts, $grepdef, $options, $single); 193 $grepdef = 0; 194 $single = 0; 195 for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options) 196 { 197 if ($ARGV[$options] eq '--single') { $single = 1; } 198 else { print "Unknown option: $ARGV[$options]\n"; } 199 } 200 if (@ARGV == 1 + $options) 201 { 202 # 0x hex, 0b bin, 0 oct 203 if ($ARGV[$options] =~ /^0/) { 204 $lcid = oct( $ARGV[0]); } 205 elsif ($ARGV[$options] =~ /^[0-9]/) { 206 $lcid = $ARGV[$options]; } 207 else 208 { 209 $grepdef = $ARGV[$options]; 210 $lcid = 0; 211 } 212 $parts[0] = getPrimaryLanguage( $lcid); 213 $parts[1] = getSubLanguage( $lcid); 214 } 215 elsif (@ARGV == 2 + $options) 216 { 217 for (my $i = $options; $i < 2 + $options; ++$i) 218 { 219 if ($ARGV[$i] =~ /^0/) { 220 $parts[$i] = oct( $ARGV[$i]); } 221 else { 222 $parts[$i] = $ARGV[$i]; } 223 } 224 $lcid = makeLangID( $parts[1], $parts[0]); 225 } 226 else 227 { 228 Usage(); 229 return 1; 230 } 231 my $modifier = "(?i)"; 232 my (@resultlist, @greplist, $result); 233 # If no string was given on the command line, but value(s) were, lookup the 234 # LangID value to obtain the define identifier. 235 if ($grepdef) 236 { 237 # #define LANGUAGE_AFRIKAANS 0x0436 238 @resultlist = grepFile( 239 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, 240 $SOLENVINC, "i18npool", "lang.h", ()); 241 } 242 else 243 { 244 printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid, 245 $lcid, $parts[0], $parts[1]); 246 my $buf = sprintf( "0x%04X", $lcid); 247 @resultlist = grepFile( 248 '^\s*#\s*define\s+\w+\s+' . $buf, 249 $SOLENVINC, "i18npool", "lang.h", ()); 250 } 251 for $result (@resultlist) 252 { 253 # #define LANGUAGE_AFRIKAANS 0x0436 254 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) 255 { 256 push( @greplist, '\b' . $1 . '\b'); 257 $modifier = ""; # complete identifier now case sensitive 258 if ($single) { 259 last; } 260 } 261 } 262 # If the string given is of the form xx-yy lookup a language,country pair 263 # to obtain the define identifier. xx and yy themselfs may be regexps. 264 # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy' 265 if ($grepdef =~ /^(.*)-$/) { 266 $grepdef = $1 . "-.*"; } 267 if ($grepdef =~ /^-(.*)$/) { 268 $grepdef = ".*-" . $1; } 269 if ($grepdef =~ /^(.*)-(.*)$/) 270 { 271 my $lang = $1; 272 my $coun = $2; 273 $lang = lc($lang); 274 $coun = uc($coun); 275 # { LANGUAGE_AFRIKAANS, "af", "ZA" }, 276 @resultlist = grepFile( 277 '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,', 278 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); 279 for $result (@resultlist) 280 { 281 if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) 282 { 283 push( @greplist, '\b' . $1 . '\b'); 284 $modifier = ""; # complete identifier now case sensitive 285 if ($single) { 286 last; } 287 } 288 } 289 $grepdef = 0; 290 } 291 if (!@greplist && $grepdef) { 292 push( @greplist, $grepdef); } 293 for $grepdef (@greplist) 294 { 295 print "\nUsing: " . $grepdef . "\n"; 296 297 # Decimal LCID, was needed for Langpack.ulf but isn't used anymore, 298 # keep just in case we'd need it again. 299 # #define LANGUAGE_AFRIKAANS 0x0436 300 @resultlist = grepFile( 301 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, 302 $SOLENVINC, "i18npool", "lang.h", ()); 303 my @lcidlist; 304 for $result (@resultlist) 305 { 306 # #define LANGUAGE_AFRIKAANS 0x0436 307 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) 308 { 309 push( @lcidlist, oct( $2)); 310 } 311 } 312 313 # { LANGUAGE_AFRIKAANS, "af", "ZA" }, 314 @resultlist = grepFile( 315 $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,', 316 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); 317 318 my @langcoungreplist; 319 for $result (@resultlist) 320 { 321 if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) 322 { 323 my $lang = $1; 324 my $coun = $2; 325 my $loca; 326 if ($coun) 327 { 328 $loca = $lang . "_" . $coun; 329 push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); 330 } 331 else 332 { 333 $loca = $lang; 334 $coun = ""; 335 push( @langcoungreplist, '\b' . $lang . '\b'); 336 } 337 my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; 338 my $found; 339 if (!($found = open( LD, $file))) 340 { 341 $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml"; 342 if (!($found = open( LD, $file))) 343 { 344 $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml"; 345 $found = open( LD, $file); 346 } 347 } 348 if ($found) 349 { 350 print "Found $file:\n"; 351 my $on = 0; 352 while (my $line = <LD>) 353 { 354 if ($line =~ /<(Language|Country)>/) { 355 $on = 1; } 356 if ($on) { 357 print $line; } 358 if ($line =~ /<\/(Language|Country)>/) { 359 $on = 0; } 360 } 361 close( LD); 362 } 363 else { 364 print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; } 365 } 366 } 367 368 # case LANGUAGE_ARABIC: 369 grepFile( 370 $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:', 371 "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ()); 372 373 # With CWS 'langstatusbar' the language listbox resource file gets a new location. 374 my $module = "svx"; 375 my $name = "source/dialog/langtab.src"; 376 if (!(-e "$SRC_ROOT/$module/$name")) { 377 $module = "svtools"; 378 $name = "source/misc/langtab.src"; 379 } 380 # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ; 381 # lookup define 382 @resultlist = grepFile( 383 $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;', 384 "$SRC_ROOT", $module, $name, ()); 385 # lookup string 386 if (!@resultlist) { 387 grepFile( 388 $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;', 389 "$SRC_ROOT", $module, $name, ()); } 390 391 for my $langcoun (@langcoungreplist) 392 { 393 # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu"; 394 grepFile( 395 '^\s*Name\s*\(' . $langcoun . '\)\s*=', 396 "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); 397 398 # completelangiso=af ar as-IN ... zu 399 grepFile( 400 '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', 401 "$SRC_ROOT", "solenv", "inc/postset.mk", 402 # needs a duplicated pair of backslashes to produce a literal \\ 403 ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); 404 405 # @noMSLocaleLangs = ( "br", "bs", ... ) 406 grepFile( 407 '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '', 408 "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm", 409 ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"')); 410 411 # af 1252 1078 # Afrikaans 412 grepFile( 413 '^\s*' . $langcoun . '', 414 "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ()); 415 } 416 } 417 return 0; 418} 419 420main(); 421