1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70# Set to 1 to get a more verbose output, the default is 0. 71my $Debug = 0; 72 73 74=head3 Prepare 75 Check that some environment variables are properly set and then return the file name 76 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 77=cut 78sub Prepare () 79{ 80 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 81 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 82 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 83 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 84 85 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 86 die "can not read file $candidate" if ! -r $candidate; 87 return $candidate; 88} 89 90 91 92=head 3 EvaluateOperator 93 Evaluate a single test statement like 'language = en.*'. 94 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 95 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 96 97 Other operators are at the moment only supported in the way that they are evaluated via eval(). 98=cut 99sub EvaluateOperator ($$$) 100{ 101 my ($left,$operator,$right) = @_; 102 103 my $result; 104 105 if ($operator =~ /^(=|==|eq)$/) 106 { 107 if ($left =~ /^$right$/) 108 { 109 $result = 1; 110 } 111 else 112 { 113 $result = 0; 114 } 115 } 116 elsif (eval($left.$operator.$right)) 117 { 118 $result = 1; 119 } 120 else 121 { 122 $result = 0; 123 } 124 125 return $result; 126} 127 128 129 130 131=head EvaluateTerm 132 Evaluate a string that contains a simple test term of the form 133 left operator right 134 with arbitrary spacing allowed around and between the three parts. 135 136 The left hand side is specially handled: 137 138 - When the left hand side is 'language' then it is replaced by 139 any of the given languages in turn. When the term evaluates to true for any of the languages then 140 true is returned. False is returned only when none of the given languages matches. 141 142 - When the left hand side consists only of upper case letters, digits, and '_' then it is 143 interpreted as the name of a environment variable. It is replaced by its value before the term 144 is evaluated. 145 146 - Any other left hand side is an error (at the moment.) 147=cut 148sub EvaluateTerm ($$) 149{ 150 my $term = shift; 151 my $languages = shift; 152 153 my $result; 154 155 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 156 { 157 my ($left,$operator,$right) = ($1,$2,$3); 158 159 if ($operator !~ /^=|==|eq$/) 160 { 161 die "unsupported operator $operator on line $LineNo"; 162 } 163 164 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 165 166 if ($left =~ /^[A-Z_0-9]+$/) 167 { 168 # Uppercase words are interpreted as environment variables. 169 my $left_value = $ENV{$left}; 170 $left_value = "" if ! defined $left_value; 171 172 # We can check whether the condition is fullfilled right now. 173 $result = EvaluateOperator($left_value, $operator, $right); 174 } 175 elsif ($left eq "language") 176 { 177 if ($right eq "all") 178 { 179 $result = 1; 180 } 181 elsif ($#$languages>=0) 182 { 183 $result = 0; 184 for my $language (@$languages) 185 { 186 # Unify naming schemes. 187 $language =~ s/_/-/g; 188 $right =~ s/_/-/g; 189 190 # Evaluate language regexp. 191 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 192 last if $result; 193 } 194 } 195 else 196 { 197 # The set of languages is not yet known. Return true 198 # to include the following entries. 199 $result = 1; 200 } 201 } 202 elsif ($left eq "platform") 203 { 204 if ($right eq "all") 205 { 206 $result = 1; 207 } 208 else 209 { 210 # Evaluate platform regexp. 211 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 212 } 213 } 214 else 215 { 216 die "can not handle left hand side $left on line $LineNo"; 217 } 218 } 219 else 220 { 221 die "syntax error in expression on line $LineNo"; 222 } 223 224 return $result; 225} 226 227 228 229 230=head3 EvaluateSelector 231 Evaluate the given expression that is expected to be list of terms of the form 232 left-hand-side operator right-hand-side 233 that are separated by logical operators 234 && || 235 The expression is lazy evaluated left to right. 236=cut 237sub EvaluateSelector($$); 238sub EvaluateSelector($$) 239{ 240 my $expression = shift; 241 my $languages = shift; 242 243 my $result = ""; 244 245 if ($expression =~ /^\s*$/) 246 { 247 # Empty selector is always true. 248 return 1; 249 } 250 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 251 { 252 my ($term, $operator) = ($1,$2); 253 $expression = $3; 254 255 my $left_result = EvaluateTerm($term, $languages); 256 # Lazy evaluation of && 257 return 0 if ($operator eq "&&" && !$left_result); 258 # Lazy evaluation of || 259 return 1 if ($operator eq "||" && $left_result); 260 my $right_result = EvaluateSelector($expression, $languages); 261 262 if ($operator eq "&&") 263 { 264 return $left_result && $right_result; 265 } 266 else 267 { 268 return $left_result || $right_result; 269 } 270 } 271 elsif ($expression =~ /^\s*(.+?)\s*$/) 272 { 273 return EvaluateTerm($1, $languages); 274 } 275 else 276 { 277 die "invalid expression syntax on line $LineNo ($expression)"; 278 } 279} 280 281 282 283 284=head3 ProcessURL 285 Check that the given line contains an optional MD5 sum followed by 286 a URL for one of the protocols file, http, https, 287 followed by an optional file name (which is necessary when it is not the last part of the URL.) 288 Return an array that contains the protocol, the name, the original 289 URL, and the MD5 sum from the beginning of the line. 290 The name of the URL depends on its protocol: 291 - for http(s) the part of the URL after the last '/'. 292 - for file URLS it is everything after the protocol:// 293=cut 294sub ProcessURL ($) 295{ 296 my $line = shift; 297 298 # Check that we are looking at a valid URL. 299 if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/) 300 { 301 my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6); 302 my $URL = $3.$4.$5; 303 304 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 305 306 # Determine the name. If an optional name is given then use that. 307 if (defined $optional_name) 308 { 309 die if $optional_name !~ /^\s+\"([^\"]+)\"$/; 310 $name = $1; 311 } 312 else 313 { 314 if ($protocol eq "file") 315 { 316 # For file URLs we use everything after :// as name, or the . 317 $URL =~ /:\/\/(.*)$/; 318 $name = $1; 319 } 320 else 321 { 322 # For http and https use the last part of the URL. 323 $name = $url_name; 324 } 325 } 326 327 return [$protocol, $name, $URL, $md5]; 328 } 329 else 330 { 331 die "invalid URL at line $LineNo:\n$line\n"; 332 } 333} 334 335 336 337 338=head3 ParseExtensionsLst 339 Parse the extensions.lst file. 340 341 Lines that contain only spaces or comments or are empty are 342 ignored. 343 344 Lines that contain a selector, ie a test enclosed in brackets, are 345 evaluated. The following lines, until the next selector, are 346 ignored when the selector evaluates to false. When an empty list 347 of languages is given then any 'language=...' test is evaluated as 348 true. 349 350 All other lines are expected to contain a URL optionally preceded 351 by an MD5 sum. 352=cut 353sub ParseExtensionsLst ($$) 354{ 355 my $file_name = shift; 356 my $languages = shift; 357 358 open my $in, "$file_name"; 359 360 my $current_selector_value = 1; 361 my @URLs = (); 362 363 while (<$in>) 364 { 365 my $line = $_; 366 $line =~ s/[\r\n]+//g; 367 ++$LineNo; 368 369 # Strip away comments. 370 next if $line =~ /^\s*#/; 371 372 # Ignore empty lines. 373 next if $line =~ /^\s*$/; 374 375 # Process selectors 376 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 377 { 378 $current_selector_value = EvaluateSelector($1, $languages); 379 } 380 else 381 { 382 if ($current_selector_value) 383 { 384 push @URLs, ProcessURL($line); 385 } 386 } 387 } 388 389 close $in; 390 391 return @URLs; 392} 393 394 395 396 397=head3 Download 398 Download a set of files that are specified via URLs. 399 400 File URLs are ignored here because they point to extensions that have not yet been built. 401 402 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 403 files that do not match that checksum are an error and lead to abortion of the current process. 404 Files that have already been downloaded are not downloaded again. 405=cut 406sub Download (@) 407{ 408 my @urls = @_; 409 410 my @missing = (); 411 my $download_path = $ENV{'TARFILE_LOCATION'}; 412 413 # First check which (if any) extensions have already been downloaded. 414 for my $entry (@urls) 415 { 416 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 417 418 # We can not check the existence of file URLs because they point to extensions that 419 # have yet to be built. 420 421 next if $protocol ne "http"; 422 my $candidate = File::Spec->catfile($download_path, $name); 423 if ( ! -f $candidate) 424 { 425 push @missing, $entry; 426 } 427 } 428 if ($#missing >= 0) 429 { 430 printf "downloading %d missing extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 431 if ( ! -d $download_path) 432 { 433 mkdir File::Spec->catpath($download_path, "tmp") 434 || die "can not create tmp subdirectory of $download_path"; 435 } 436 } 437 else 438 { 439 print "all downloadable extensions present\n"; 440 return; 441 } 442 443 # Download the missing files. 444 for my $entry (@missing) 445 { 446 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 447 448 # Open a .part file for writing. 449 my $filename = File::Spec->catfile($download_path, $name); 450 my $temporary_filename = $filename . ".part"; 451 print "downloading to $temporary_filename\n"; 452 open my $out, ">$temporary_filename"; 453 binmode($out); 454 455 # Prepare md5 456 my $md5 = Digest::MD5->new(); 457 458 # Download the extension. 459 my $agent = LWP::UserAgent->new(); 460 $agent->timeout(120); 461 $agent->show_progress(1); 462 my $last_was_redirect = 0; 463 $agent->add_handler('response_redirect' 464 => sub{ 465 $last_was_redirect = 1; 466 return; 467 }); 468 $agent->add_handler('response_data' 469 => sub{ 470 if ($last_was_redirect) 471 { 472 $last_was_redirect = 0; 473 # Throw away the data we got so far. 474 $md5->reset(); 475 close $out; 476 open $out, ">$temporary_filename"; 477 binmode($out); 478 } 479 my($response,$agent,$h,$data)=@_; 480 print $out $data; 481 $md5->add($data); 482 }); 483 my $response = $agent->get($URL); 484 close $out; 485 486 # When download was successfull then check the md5 checksum and rename the .part file 487 # into the actual extension name. 488 if ($response->is_success()) 489 { 490 if (defined $md5sum && length($md5sum)==32) 491 { 492 my $file_md5 = $md5->hexdigest(); 493 if ($md5sum eq $file_md5) 494 { 495 print "md5 is OK\n"; 496 } 497 else 498 { 499 unlink($temporary_filename) if ! $Debug; 500 die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum"; 501 } 502 } 503 else 504 { 505 print "md5 is not present\n"; 506 printf " is %s, length is %d\n", $md5sum, length(md5sum); 507 } 508 509 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 510 } 511 else 512 { 513 die "failed to download $URL"; 514 } 515 } 516} 517 518 519 520 521=head3 DownloadExtensions 522 This function is intended to be called during bootstrapping. It extracts the set of extensions 523 that will be used later, when the installation sets are built. 524 The set of languages is taken from the WITH_LANG environment variable. 525=cut 526sub DownloadExtensions () 527{ 528 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 529 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 530 { 531 my $full_file_name = Prepare(); 532 my $languages = [ "en_US" ]; 533 if (defined $ENV{'WITH_LANG'}) 534 { 535 @$languages = split(/\s+/, $ENV{'WITH_LANG'}); 536 foreach my $l (@$languages) 537 { 538 print "$l\n"; 539 } 540 } 541 my @urls = ParseExtensionsLst($full_file_name, $languages); 542 Download(@urls); 543 } 544 else 545 { 546 print "bundling of dictionaries is disabled.\n"; 547 } 548} 549 550 551 552 553=head3 GetExtensionList 554 This function is intended to be called when installation sets are built. 555 It expects two arguments: 556 - A protocol selector. Http URLs reference remotely located 557 extensions that will be bundled as-is into the installation 558 sets due to legal reasons. They are installed on first start 559 of the office. 560 File URLs reference extensions whose source code is part of 561 the repository. They are pre-registered when installation 562 sets are created. Their installation is finished when the 563 office is first started. 564 - A set of languages. This set determines which extensions 565 are returned and then included in an installation set. 566=cut 567sub GetExtensionList ($@) 568{ 569 my $protocol_selector = shift; 570 my @language_list = @_; 571 572 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 573 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 574 { 575 my $full_file_name = Prepare(); 576 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 577 578 my @result = (); 579 for my $entry (@urls) 580 { 581 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 582 if ($protocol =~ /^$protocol_selector$/) 583 { 584 push @result, $name; 585 } 586 } 587 588 return @result; 589 } 590 else 591 { 592 # Bundling of dictionaires is disabled. 593 } 594 595 return (); 596} 597 598 5991; 600