1#**************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20#**************************************************************
21
22package ExtensionsLst;
23
24use File::Spec;
25use LWP::UserAgent;
26use Digest::MD5;
27
28use base 'Exporter';
29our @EXPORT = qw(DownloadExtensions GetExtensionList);
30
31
32=head1 NAME
33
34    ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file.
35
36=head1 SYNOPSIS
37
38    For downloading extensions during build setup:
39
40    use ExtensionsLst;
41    ExtensionsLst::DownloadExtensions();
42
43    For including extensions into the pack set:
44
45    use ExtensionsLst;
46    ExtensionsLst::GetExtensionList(@language_list);
47
48=head1 DESCRIPTION
49
50    The contents of the extensions.lst file are used at two times in
51    the process of building pack sets.
52
53    Once at the beginning right after configure is run the
54    DownloadExtensions() function determines the list of extensions
55    that are not present locally and downloads them.
56
57    The second time is after all modules are built (and the locally
58    built extensions are present) and the pack sets are created.  For
59    every language (or sets of lanugages) a set of extensions is
60    collected and included into the pack set.
61
62    The content of the extensions.lst file is ignored when the --with-extensions option is given to configure.
63
64=cut
65
66
67# Number of the line in extensions.lst that is currently being processed.
68my $LineNo = 0;
69
70# Set to 1 to get a more verbose output, the default is 0.
71my $Debug = 0;
72
73
74=head3 Prepare
75    Check that some environment variables are properly set and then return the file name
76    of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'.
77=cut
78sub Prepare ()
79{
80    die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'};
81    die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'};
82    die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'};
83    die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'};
84
85    my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst");
86    die "can not read file $candidate" if ! -r $candidate;
87    return $candidate;
88}
89
90
91
92=head 3 EvaluateOperator
93    Evaluate a single test statement like 'language = en.*'.
94    Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'.
95    Therefore the right hand side may be a perl regexp.  It is prefixed with '^'.
96
97    Other operators are at the moment only supported in the way that they are evaluated via eval().
98=cut
99sub EvaluateOperator ($$$)
100{
101    my ($left,$operator,$right) = @_;
102
103    my $result;
104
105    if ($operator =~ /^(=|==|eq)$/)
106    {
107        if ($left =~ /^$right$/)
108        {
109            $result = 1;
110        }
111        else
112        {
113            $result = 0;
114        }
115    }
116    elsif (eval($left.$operator.$right))
117    {
118        $result = 1;
119    }
120    else
121    {
122        $result = 0;
123    }
124
125    return $result;
126}
127
128
129
130
131=head EvaluateTerm
132    Evaluate a string that contains a simple test term of the form
133    left operator right
134    with arbitrary spacing allowed around and between the three parts.
135
136    The left hand side is specially handled:
137
138    - When the left hand side is 'language' then it is replaced by
139    any of the given languages in turn.  When the term evaluates to true for any of the languages then
140    true is returned.  False is returned only when none of the given languages matches.
141
142    - When the left hand side consists only of upper case letters, digits, and '_' then it is
143    interpreted as the name of a environment variable.  It is replaced by its value before the term
144    is evaluated.
145
146    - Any other left hand side is an error (at the moment.)
147=cut
148sub EvaluateTerm ($$)
149{
150    my $term = shift;
151    my $languages = shift;
152
153    my $result;
154
155    if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/)
156    {
157        my ($left,$operator,$right) = ($1,$2,$3);
158
159        if ($operator !~ /^=|==|eq$/)
160        {
161            die "unsupported operator $operator on line $LineNo";
162        }
163
164        die "no right side in condition on line $LineNo ($term)" if ! defined $right;
165
166        if ($left =~ /^[A-Z_0-9]+$/)
167        {
168            # Uppercase words are interpreted as environment variables.
169            my $left_value = $ENV{$left};
170            $left_value = "" if ! defined $left_value;
171
172            # We can check whether the condition is fullfilled right now.
173            $result = EvaluateOperator($left_value, $operator, $right);
174        }
175        elsif ($left eq "language")
176        {
177            if ($right eq "all")
178            {
179                $result = 1;
180            }
181            elsif ($#$languages>=0)
182            {
183                $result = 0;
184                for my $language (@$languages)
185                {
186                    # Unify naming schemes.
187                    $language =~ s/_/-/g;
188                    $right =~ s/_/-/g;
189
190                    # Evaluate language regexp.
191                    $result = EvaluateOperator($language, $operator, $right) ? 1 : 0;
192                    last if $result;
193                }
194            }
195            else
196            {
197                # The set of languages is not yet known.  Return true
198                # to include the following entries.
199                $result = 1;
200            }
201        }
202        elsif ($left eq "platform")
203        {
204            if ($right eq "all")
205            {
206                $result = 1;
207            }
208            else
209            {
210                # Evaluate platform regexp.
211                $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0;
212            }
213        }
214        else
215        {
216            die "can not handle left hand side $left on line $LineNo";
217        }
218    }
219    else
220    {
221        die "syntax error in expression on line $LineNo";
222    }
223
224    return $result;
225}
226
227
228
229
230=head3 EvaluateSelector
231    Evaluate the given expression that is expected to be list of terms of the form
232        left-hand-side operator right-hand-side
233    that are separated by logical operators
234        && ||
235    The expression is lazy evaluated left to right.
236=cut
237sub EvaluateSelector($$);
238sub EvaluateSelector($$)
239{
240    my $expression = shift;
241    my $languages = shift;
242
243    my $result = "";
244
245    if ($expression =~ /^\s*$/)
246    {
247        # Empty selector is always true.
248        return 1;
249    }
250    elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/)
251    {
252        my ($term, $operator) = ($1,$2);
253        $expression = $3;
254
255        my $left_result = EvaluateTerm($term, $languages);
256        # Lazy evaluation of &&
257        return 0 if ($operator eq "&&" && !$left_result);
258        # Lazy evaluation of ||
259        return 1 if ($operator eq "||" && $left_result);
260        my $right_result = EvaluateSelector($expression, $languages);
261
262        if ($operator eq "&&")
263        {
264            return $left_result && $right_result;
265        }
266        else
267        {
268            return $left_result || $right_result;
269        }
270    }
271    elsif ($expression =~ /^\s*(.+?)\s*$/)
272    {
273        return EvaluateTerm($1, $languages);
274    }
275    else
276    {
277        die "invalid expression syntax on line $LineNo ($expression)";
278    }
279}
280
281
282
283
284=head3 ProcessURL
285    Check that the given line contains an optional MD5 sum followed by
286    a URL for one of the protocols file, http, https,
287    followed by an optional file name (which is necessary when it is not the last part of the URL.)
288    Return an array that contains the protocol, the name, the original
289    URL, and the MD5 sum from the beginning of the line.
290    The name of the URL depends on its protocol:
291    - for http(s) the part of the URL after the last '/'.
292    - for file URLS it is everything after the protocol://
293=cut
294sub ProcessURL ($)
295{
296    my $line = shift;
297
298    # Check that we are looking at a valid URL.
299    if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/)
300    {
301        my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6);
302        my $URL = $3.$4.$5;
303
304        die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/;
305
306        # Determine the name.  If an optional name is given then use that.
307        if (defined $optional_name)
308        {
309            die if $optional_name !~ /^\s+\"([^\"]+)\"$/;
310            $name = $1;
311        }
312        else
313        {
314            if ($protocol eq "file")
315            {
316                # For file URLs we use everything after :// as name, or the .
317                $URL =~ /:\/\/(.*)$/;
318                $name = $1;
319            }
320            else
321            {
322                # For http and https use the last part of the URL.
323                $name = $url_name;
324            }
325        }
326
327        return [$protocol, $name, $URL, $md5];
328    }
329    else
330    {
331        die "invalid URL at line $LineNo:\n$line\n";
332    }
333}
334
335
336
337
338=head3 ParseExtensionsLst
339    Parse the extensions.lst file.
340
341    Lines that contain only spaces or comments or are empty are
342    ignored.
343
344    Lines that contain a selector, ie a test enclosed in brackets, are
345    evaluated.  The following lines, until the next selector, are
346    ignored when the selector evaluates to false.  When an empty list
347    of languages is given then any 'language=...' test is evaluated as
348    true.
349
350    All other lines are expected to contain a URL optionally preceded
351    by an MD5 sum.
352=cut
353sub ParseExtensionsLst ($$)
354{
355    my $file_name = shift;
356    my $languages = shift;
357
358    open my $in, "$file_name";
359
360    my $current_selector_value = 1;
361    my @URLs = ();
362
363    while (<$in>)
364    {
365        my $line = $_;
366        $line =~ s/[\r\n]+//g;
367        ++$LineNo;
368
369        # Strip away comments.
370        next if $line =~ /^\s*#/;
371
372        # Ignore empty lines.
373        next if $line =~ /^\s*$/;
374
375        # Process selectors
376        if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/)
377        {
378            $current_selector_value = EvaluateSelector($1, $languages);
379        }
380        else
381        {
382            if ($current_selector_value)
383            {
384                push @URLs, ProcessURL($line);
385            }
386        }
387    }
388
389    close $in;
390
391    return @URLs;
392}
393
394
395
396
397=head3 Download
398    Download a set of files that are specified via URLs.
399
400    File URLs are ignored here because they point to extensions that have not yet been built.
401
402    For http URLs there may be an optional MD5 checksum.  If it is present then downloaded
403    files that do not match that checksum are an error and lead to abortion of the current process.
404    Files that have already been downloaded are not downloaded again.
405=cut
406sub Download (@)
407{
408    my @urls = @_;
409
410    my @missing = ();
411    my $download_path = $ENV{'TARFILE_LOCATION'};
412
413    # First check which (if any) extensions have already been downloaded.
414    for my $entry (@urls)
415    {
416        my ($protocol, $name, $URL, $md5sum) = @{$entry};
417
418        # We can not check the existence of file URLs because they point to extensions that
419        # have yet to be built.
420
421        next if $protocol !~ /(http|https)/;
422        my $candidate = File::Spec->catfile($download_path, $name);
423        if ( ! -f $candidate)
424        {
425            push @missing, $entry;
426        }
427        elsif (defined $md5sum)
428        {
429            # Check that the MD5 sum is still correct.
430            # The datafile may have been updated with a new version of the extension that
431            # still has the same name but a different MD5 sum.
432            my $cur_oxt;
433            if ( ! open($cur_oxt, $candidate))
434            {
435                # Can not read the extension.  Download extension again.
436                push @missing, $entry;
437                unlink($candidate);
438            }
439            binmode($cur_oxt);
440            my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest;
441            close($cur_oxt);
442            if ($md5sum ne $file_md5)
443            {
444                # MD5 does not match.  Download extension again.
445                print "extension $name has wrong MD5 and will be updated\n";
446                push @missing, $entry;
447                unlink($candidate);
448            }
449        }
450    }
451    if ($#missing >= 0)
452    {
453        printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : "";
454        if ( ! -d $download_path)
455        {
456            mkdir File::Spec->catpath($download_path, "tmp")
457                || die "can not create tmp subdirectory of $download_path";
458        }
459    }
460    else
461    {
462        print "all downloadable extensions present\n";
463        return;
464    }
465
466    # Download the missing files.
467    for my $entry (@missing)
468    {
469        my ($protocol, $name, $URL, $md5sum) = @{$entry};
470
471        # Open a .part file for writing.
472        my $filename = File::Spec->catfile($download_path, $name);
473        my $temporary_filename = $filename . ".part";
474        print "downloading to $temporary_filename\n";
475
476        # Prepare md5
477        my $md5 = Digest::MD5->new();
478
479        # Download the extension.
480        my $agent = LWP::UserAgent->new();
481        $agent->timeout(120);
482        $agent->env_proxy;
483        my $last_was_redirect = 0;
484        my $response = $agent->get($URL);
485
486        # When download was successfull then check the md5 checksum and rename the .part file
487        # into the actual extension name.
488        if ($response->is_success())
489        {
490            my $content = $response->content;
491            open $out, ">$temporary_filename";
492            binmode($out);
493            print $out $content;
494            $md5->add($content);
495            close $out;
496            if (defined $md5sum && length($md5sum)==32)
497            {
498                my $file_md5 = $md5->hexdigest();
499                if ($md5sum eq $file_md5)
500                {
501                    print "md5 is OK\n";
502                }
503                else
504                {
505                    unlink($temporary_filename) if ! $Debug;
506                    die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum";
507                }
508            }
509            else
510            {
511                print "md5 is not present\n";
512                printf "   is %s, length is %d\n", $md5sum, length(md5sum);
513            }
514
515            rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
516        }
517        else
518        {
519            die "failed to download $URL";
520        }
521    }
522}
523
524
525
526
527=head3 DownloadExtensions
528    This function is intended to be called during bootstrapping.  It extracts the set of extensions
529    that will be used later, when the installation sets are built.
530    The set of languages is taken from the WITH_LANG environment variable.
531=cut
532sub DownloadExtensions ()
533{
534    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
535         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
536    {
537        my $full_file_name = Prepare();
538        my $languages = [ "en_US" ];
539        if (defined $ENV{'WITH_LANG'})
540        {
541            @$languages = split(/\s+/, $ENV{'WITH_LANG'});
542            foreach my $l (@$languages)
543            {
544                print "$l\n";
545            }
546        }
547        my @urls = ParseExtensionsLst($full_file_name, $languages);
548        Download(@urls);
549    }
550    else
551    {
552        print "bundling of dictionaries is disabled.\n";
553    }
554}
555
556
557
558
559=head3 GetExtensionList
560    This function is intended to be called when installation sets are built.
561    It expects two arguments:
562        - A protocol selector.  Http URLs reference remotely located
563          extensions that will be bundled as-is into the installation
564          sets due to legal reasons. They are installed on first start
565          of the office.
566          File URLs reference extensions whose source code is part of
567          the repository.  They are pre-registered when installation
568          sets are created.  Their installation is finished when the
569          office is first started.
570        - A set of languages.  This set determines which extensions
571          are returned and then included in an installation set.
572=cut
573sub GetExtensionList ($@)
574{
575    my $protocol_selector = shift;
576    my @language_list = @_;
577
578    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
579         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
580    {
581        my $full_file_name = Prepare();
582        my @urls = ParseExtensionsLst($full_file_name, \@language_list);
583
584        my @result = ();
585        for my $entry (@urls)
586        {
587            my ($protocol, $name, $URL, $md5sum) = @{$entry};
588            if ($protocol =~ /^$protocol_selector$/)
589            {
590                push @result, $name;
591            }
592        }
593
594        return @result;
595    }
596    else
597    {
598        # Bundling of dictionaires is disabled.
599    }
600
601    return ();
602}
603
604
6051;
606