1#!/usr/bin/perl
2
3#**************************************************************
4#
5#  Licensed to the Apache Software Foundation (ASF) under one
6#  or more contributor license agreements.  See the NOTICE file
7#  distributed with this work for additional information
8#  regarding copyright ownership.  The ASF licenses this file
9#  to you under the Apache License, Version 2.0 (the
10#  "License"); you may not use this file except in compliance
11#  with the License.  You may obtain a copy of the License at
12#
13#    http://www.apache.org/licenses/LICENSE-2.0
14#
15#  Unless required by applicable law or agreed to in writing,
16#  software distributed under the License is distributed on an
17#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18#  KIND, either express or implied.  See the License for the
19#  specific language governing permissions and limitations
20#  under the License.
21#
22#**************************************************************
23
24=head1 NAME
25
26    download_external_libraries.pl - Load missing tarballs specified in main/external_libs.lst.
27
28=head1 SYNOPSIS
29
30    For downloading external libraries (typically from the main/bootstrap script):
31
32    download_external_libraries(<data-file-name>);
33
34=head1 DESCRIPTION
35
36    The contents of the main/external_libs.lst file are used to determine the
37    external library tarballs that are missing from ext_sources/.
38
39    Individual libraries can be ignored depending on the values of environment variables.
40
41    Format of the main/external_libs.lst file:
42
43    The file is line based.
44    Comments start with a # and go to the end of the line and are ignored.
45    Lines that are empty or contain only spaces and/or comments are ignored.
46
47    All other lines can have one of two forms:
48    - A variable definition of the form <name>=<value>.
49    - A conditional block start in the form "if (<expression>)"
50
51    Variables defined in a conditional block are only visible in this block and
52    replace the definition of global variables and variables earlier in the same
53    block.
54    Some variables have special names:
55    - MD5 is the expected MD5 checksum of the library tarball.
56    - SHA1 is the expected SHA1 checksum of the library tarball.
57    - URL1 to URL9 specify from where to download the tarball.  The urls are tried in order.
58      The first successful download (download completed and checksum match) stops the iteration.
59
60    Expressions are explained below in the comment of EvaluateExpression().
61
62    A library is only regarded if its conditional expression evaluates to 1.
63
64    Example:
65
66    DefaultSite=http://some-internet-site.org
67    if ( true )
68        MD5 = 0123456789abcdef0123456789abcdef
69        name = library-1.0.tar.gz
70        URL1 = http://some-other-internet-site.org/another-name.tgz
71        URL2 = $(DefaultSite)$(MD5)-$(name)
72
73    This tries to load a library first from some-other-internet-site.org and if
74    that fails from some-internet-site.org.  The library is stored as $(MD5)-$(name)
75    even when it is loaded as another-name.tgz.
76
77=cut
78
79
80use strict;
81
82use File::Spec;
83use File::Path;
84use File::Basename;
85use LWP::UserAgent;
86use Digest::MD5;
87use Digest::SHA;
88use URI;
89
90my $Debug = 1;
91
92my $LocalEnvironment = undef;
93my $GlobalEnvironment = {};
94my @Missing = ();
95
96
97
98
99=head3 ProcessDataFile
100
101    Read the data file, typically named main/external_libs.lst, find the external
102    library tarballs that are not yet present in ext_sources/ and download them.
103
104=cut
105sub ProcessDataFile ($)
106{
107    my $filename = shift;
108
109    my $destination = $ENV{'TARFILE_LOCATION'};
110
111    die "can not open data file $filename" if ! -e $filename;
112
113    my $current_selector_value = 1;
114    my @URLHeads = ();
115    my @download_requests = ();
116
117    open my $in, $filename;
118    while (my $line = <$in>)
119    {
120        # Remove leading and trailing space and comments
121        $line =~ s/^\s+//;
122        $line =~ s/\s+$//;
123        $line =~ s/\s*#.*$//;
124
125        # Ignore empty lines.
126        next if $line eq "";
127
128        # An "if" statement starts a new block.
129        if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/)
130        {
131            ProcessLastBlock();
132
133            $LocalEnvironment = { 'selector' => $1 };
134        }
135
136        # Lines of the form name = value define a local variable.
137        elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/)
138        {
139            if (defined $LocalEnvironment)
140            {
141                $LocalEnvironment->{$1} = $2;
142            }
143            else
144            {
145                $GlobalEnvironment->{$1} = $2;
146            }
147        }
148        else
149        {
150            die "can not parse line $line\n";
151        }
152    }
153
154    ProcessLastBlock();
155
156    Download(\@download_requests, \@URLHeads);
157}
158
159
160
161
162=head3 ProcessLastBlock
163
164    Process the last definition of an external library.
165    If there is not last block, true for the first "if" statement, then the call is ignored.
166
167=cut
168sub ProcessLastBlock ()
169{
170    # Return if no block is defined.
171    return if ! defined $LocalEnvironment;
172
173    # Ignore the block if the selector does not match.
174    if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'})))
175    {
176        printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name'));
177    }
178    else
179    {
180        my $name = GetValue('name');
181        my $checksum = GetChecksum();
182
183        if ( ! IsPresent($name, $checksum))
184        {
185            AddDownloadRequest($name, $checksum);
186        }
187    }
188}
189
190
191
192
193=head3 AddDownloadRequest($name, $checksum)
194
195    Add a request for downloading the library $name to @Missing.
196    Collect all available URL[1-9] variables as source URLs.
197
198=cut
199sub AddDownloadRequest ($$)
200{
201    my ($name, $checksum) = @_;
202
203    print "adding download request for $name\n";
204
205    my $urls = [];
206    my $url = GetValue('URL');
207    push @$urls, SubstituteVariables($url) if (defined $url);
208    for (my $i=1; $i<10; ++$i)
209    {
210        $url = GetValue('URL'.$i);
211        next if ! defined $url;
212        push @$urls, SubstituteVariables($url);
213    }
214
215    push @Missing, [$name, $checksum, $urls];
216}
217
218
219
220
221=head3 GetChecksum()
222
223    When either MD5 or SHA1 are variables in the current scope then return
224    a reference to a hash with two entries:
225        'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum,
226        'value' is the actual checksum
227    Otherwise undef is returned.
228
229=cut
230sub GetChecksum()
231{
232    my $checksum = GetValue("MD5");
233    if (defined $checksum && $checksum ne "")
234    {
235        return { 'type' => 'MD5', 'value' => $checksum };
236    }
237    elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "")
238    {
239        return { 'type' => 'SHA1', 'value' => $checksum };
240    }
241    else
242    {
243        return undef;
244    }
245}
246
247
248
249
250=head3 GetValue($variable_name)
251
252    Return the value of the variable with name $variable_name from the local
253    environment or, if not defined there, the global environment.
254
255=cut
256sub GetValue ($)
257{
258    my $variable_name = shift;
259
260    my $candidate = $LocalEnvironment->{$variable_name};
261    return $candidate if defined $candidate;
262
263    return $GlobalEnvironment->{$variable_name};
264}
265
266
267
268=head3 SubstituteVariables($text)
269
270    Replace all references to variables in $text with the respective variable values.
271    This is done repeatedly until no variable reference remains.
272
273=cut
274sub SubstituteVariables ($)
275{
276    my $text = shift;
277
278    my $infinite_recursion_guard = 100;
279    while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/)
280    {
281        my ($head,$name,$tail) = ($1,$2,$3);
282        my $value = GetValue($name);
283        die "can not evaluate variable $name" if ! defined $value;
284        $text = $head.$value.$tail;
285
286        die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0;
287    }
288
289    return $text;
290}
291
292
293
294
295=head3 EvaluateExpression($expression)
296
297    Evaluate the $expression of an "if" statement to either 0 or 1.  It can
298    be a single term (see EvaluateTerm for a description), or several terms
299    separated by either all ||s or &&s.  A term can also be an expression
300    enclosed in parantheses.
301
302=cut
303sub EvaluateExpression ($)
304{
305    my $expression = shift;
306
307    # Evaluate sub expressions enclosed in parantheses.
308    while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/)
309    {
310        $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3;
311    }
312
313    if ($expression =~ /&&/ && $expression =~ /\|\|/)
314    {
315        die "expression can contain either && or || but not both at the same time";
316    }
317    elsif ($expression =~ /&&/)
318    {
319        foreach my $term (split (/\s*&&\s*/,$expression))
320        {
321            return 0 if ! EvaluateTerm($term);
322        }
323        return 1;
324    }
325    elsif ($expression =~ /\|\|/)
326    {
327        foreach my $term (split (/\s*\|\|\s*/,$expression))
328        {
329            return 1 if EvaluateTerm($term);
330        }
331        return 0;
332    }
333    else
334    {
335        return EvaluateTerm($expression);
336    }
337}
338
339
340
341
342=head3 EvaluateTerm($term)
343
344    Evaluate the $term to either 0 or 1.
345    A term is either the literal "true", which evaluates to 1, or an expression
346    of the form NAME=VALUE or NAME!=VALUE.  NAME is the name of an environment
347    variable and VALUE any string.  VALUE may be empty.
348
349=cut
350sub EvaluateTerm ($)
351{
352    my $term = shift;
353
354    if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/)
355    {
356        my ($variable_name, $operator, $given_value) = ($1,$2,$3);
357        my $variable_value = $ENV{$variable_name};
358        $variable_value = "" if ! defined $variable_value;
359
360        if ($operator eq "==")
361        {
362            return $variable_value eq $given_value;
363        }
364        elsif ($operator eq "!=")
365        {
366            return $variable_value ne $given_value;
367        }
368        else
369        {
370            die "unknown operator in term $term";
371        }
372    }
373    elsif ($term =~ /^\s*true\s*$/i)
374    {
375        return 1;
376    }
377    elsif ($term =~ /^\s*false\s*$/i)
378    {
379        return 0;
380    }
381    else
382    {
383        die "term $term is not of the form <environment-variable> (=|==) <value>";
384    }
385}
386
387
388
389
390=head IsPresent($name, $given_checksum)
391
392    Check if an external library tar ball with the basename $name already
393    exists in the target directory TARFILE_LOCATION.  The basename is
394    prefixed with the MD5 or SHA1 checksum.
395    If the file exists then its checksum is compared to the given one.
396
397=cut
398sub IsPresent ($$)
399{
400    my ($name, $given_checksum) = @_;
401
402    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name);
403    return 0 unless -f $filename;
404
405    # File exists.  Check if its checksum is correct.
406    my $checksum;
407    if ( ! defined $given_checksum)
408    {
409        print "no checksum given, can not verify\n";
410        return 1;
411    }
412    elsif ($given_checksum->{'type'} eq "MD5")
413    {
414        my $md5 = Digest::MD5->new();
415        open my $in, $filename;
416        $md5->addfile($in);
417        $checksum = $md5->hexdigest();
418    }
419    elsif ($given_checksum->{'type'} eq "SHA1")
420    {
421        my $sha1 = Digest::SHA->new("1");
422        open my $in, $filename;
423        $sha1->addfile($in);
424        $checksum = $sha1->hexdigest();
425    }
426    else
427    {
428        die "unsupported checksum type (not MD5 or SHA1)";
429    }
430
431    if ($given_checksum->{'value'} ne $checksum)
432    {
433        # Checksum does not match.  Delete the file.
434        print "$name exists, but checksum does not match => deleting\n";
435        unlink($filename);
436        return 0;
437    }
438    else
439    {
440        printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'});
441        return 1;
442    }
443}
444
445
446
447
448=head3 Download
449
450    Download a set of files specified by @Missing.
451
452    For http URLs there may be an optional checksum.  If it is present then downloaded
453    files that do not match that checksum lead to abortion of the current process.
454    Files that have already been downloaded are not downloaded again.
455
456=cut
457sub Download ()
458{
459    my $download_path = $ENV{'TARFILE_LOCATION'};
460
461    if (scalar @Missing > 0)
462    {
463        printf("downloading %d missing tar ball%s to %s\n",
464               scalar @Missing, scalar @Missing>0 ? "s" : "",
465               $download_path);
466    }
467    else
468    {
469        print "all external libraries present\n";
470        return;
471    }
472
473    # Download the missing files.
474    for my $item (@Missing)
475    {
476        my ($name, $checksum, $urls) = @$item;
477
478        foreach my $url (@$urls)
479        {
480            last if DownloadFile(
481                defined $checksum
482                    ? $checksum->{'value'}."-".$name
483                    : $name,
484                $url,
485                $checksum);
486        }
487    }
488}
489
490
491
492
493=head3 DownloadFile($name,$URL,$checksum)
494
495    Download a single external library tarball.  It origin is given by $URL.
496    Its destination is $(TARFILE_LOCATION)/$checksum-$name.
497
498=cut
499sub DownloadFile ($$$)
500{
501    my $name = shift;
502    my $URL = shift;
503    my $checksum = shift;
504
505    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name);
506
507    my $temporary_filename = $filename . ".part";
508
509    print "downloading to $temporary_filename\n";
510    my $out;
511    open $out, ">$temporary_filename";
512    binmode($out);
513
514    # Prepare checksum
515    my $digest;
516    if (defined $checksum && $checksum->{'type'} eq "SHA1")
517    {
518        # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.)
519        $digest = Digest::SHA->new("1");
520    }
521    elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5")
522    {
523        # Use MD5 when explicitly requested or when no checksum type is given.
524        $digest = Digest::MD5->new();
525    }
526    else
527    {
528        die "checksum type ".$checksum->{'type'}." is not supported";
529    }
530
531    # Download the extension.
532    my $agent = LWP::UserAgent->new();
533    $agent->timeout(120);
534    $agent->env_proxy;
535    $agent->show_progress(1);
536    my $last_was_redirect = 0;
537    $agent->add_handler('response_redirect'
538                        => sub{
539                            $last_was_redirect = 1;
540                            return;
541                        });
542    $agent->add_handler('response_data'
543                        => sub{
544                            if ($last_was_redirect)
545                            {
546                                $last_was_redirect = 0;
547                                # Throw away the data we got so far.
548                                $digest->reset();
549                                close $out;
550                                open $out, ">$temporary_filename";
551                                binmode($out);
552                            }
553                            my($response,$agent,$h,$data)=@_;
554                            print $out $data;
555                            $digest->add($data);
556                        });
557
558    my $response = $agent->get($URL);
559    close $out;
560
561    # When download was successfull then check the checksum and rename the .part file
562    # into the actual extension name.
563    if ($response->is_success())
564    {
565        my $file_checksum = $digest->hexdigest();
566        if (defined $checksum)
567        {
568            if ($checksum->{'value'} eq $file_checksum)
569            {
570                printf("%s checksum is OK\n", $checksum->{'type'});
571            }
572            else
573            {
574                unlink($temporary_filename);
575                printf("    %s checksum does not match (%s instead of %s)\n",
576                       $file_checksum,
577                       $checksum->{'value'},
578                       $checksum->{'type'});
579                return 0;
580            }
581        }
582        else
583        {
584            # The datafile does not contain a checksum to match against.
585            # Display the one that was calculated for the downloaded file so that
586            # it can be integrated manually into the data file.
587            printf("checksum not given, md5 of file is %s\n", $file_checksum);
588            $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name);
589        }
590
591        rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
592        return 1;
593    }
594    else
595    {
596        unlink($temporary_filename);
597        print "    download failed\n";
598        return 0;
599    }
600}
601
602
603
604
605=head3 CheckDownloadDestination ()
606
607    Make sure that the download destination $TARFILE_LOCATION does exist.  If
608    not, then the directory is created.
609
610=cut
611sub CheckDownloadDestination ()
612{
613    my $destination = $ENV{'TARFILE_LOCATION'};
614    die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq "");
615
616    if ( ! -d $destination)
617    {
618        File::Path::make_path($destination);
619        die "ERROR: can't create \$TARFILE_LOCATION" if  ! -d $destination;
620    }
621}
622
623
624
625
626=head3 ProvideSpecialTarball ($url,$name,$name_converter)
627
628    A few tarballs need special handling.  That is done here.
629
630=cut
631sub ProvideSpecialTarball ($$$)
632{
633    my $url = shift;
634    my $name = shift;
635    my $name_converter = shift;
636
637    return unless defined $url && $url ne "";
638
639    # See if we can find the executable.
640    my ($SOLARENV,$OUTPATH,$EXEEXT) =  ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'});
641    $SOLARENV = "" unless defined $SOLARENV;
642    $OUTPATH = "" unless defined $OUTPATH;
643    $EXEEXT = "" unless defined $EXEEXT;
644    if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT))
645    {
646        print "found $name executable\n";
647        return;
648    }
649
650    # Download the source from the URL.
651    my $basename = basename(URI->new($url)->path());
652    die unless defined $basename;
653
654    if (defined $name_converter)
655    {
656        $basename = &{$name_converter}($basename);
657    }
658
659    # Has the source tar ball already been downloaded?
660    my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename));
661    if (scalar @candidates > 0)
662    {
663        # Yes.
664        print "$basename exists\n";
665        return;
666    }
667    else
668    {
669        # No, download it.
670        print "downloading $basename\n";
671        DownloadFile($basename, $url, undef);
672    }
673}
674
675
676
677
678
679# The main() functionality.
680
681die "usage: $0 <data-file-name>" if scalar @ARGV != 1;
682my $data_file = $ARGV[0];
683CheckDownloadDestination();
684ProcessDataFile($data_file);
685ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef);
686ProvideSpecialTarball(
687    $ENV{'EPM_URL'},
688    "epm",
689    sub{$_[0]=~s/-source//; return $_[0]});
690