1#!/usr/bin/env perl
2
3#**************************************************************
4#
5#  Licensed to the Apache Software Foundation (ASF) under one
6#  or more contributor license agreements.  See the NOTICE file
7#  distributed with this work for additional information
8#  regarding copyright ownership.  The ASF licenses this file
9#  to you under the Apache License, Version 2.0 (the
10#  "License"); you may not use this file except in compliance
11#  with the License.  You may obtain a copy of the License at
12#
13#    http://www.apache.org/licenses/LICENSE-2.0
14#
15#  Unless required by applicable law or agreed to in writing,
16#  software distributed under the License is distributed on an
17#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18#  KIND, either express or implied.  See the License for the
19#  specific language governing permissions and limitations
20#  under the License.
21#
22#**************************************************************
23
24=head1 NAME
25
26    download_external_libraries.pl - Load missing tarballs specified in main/external_deps.lst.
27
28=head1 SYNOPSIS
29
30    For downloading external libraries (typically from the main/bootstrap script):
31
32    download_external_libraries(<data-file-name>);
33
34=head1 DESCRIPTION
35
36    The contents of the main/external_deps.lst file are used to determine the
37    external library tarballs that are missing from ext_sources/.
38
39    Individual libraries can be ignored depending on the values of environment variables.
40
41    Format of the main/external_deps.lst file:
42
43    The file is line based.
44    Comments start with a # and go to the end of the line and are ignored.
45    Lines that are empty or contain only spaces and/or comments are ignored.
46
47    All other lines can have one of two forms:
48    - A variable definition of the form <name>=<value>.
49    - A conditional block start in the form "if (<expression>)"
50
51    Variables defined in a conditional block are only visible in this block and
52    replace the definition of global variables and variables earlier in the same
53    block.
54    Some variables have special names:
55    - MD5 is the expected MD5 checksum of the library tarball.
56    - SHA1 is the expected SHA1 checksum of the library tarball.
57    - URL1 to URL9 specify from where to download the tarball. The URLs are tried in order.
58      The first successful download (download completed and checksum match) stops the iteration.
59
60    Expressions are explained below in the comment of EvaluateExpression().
61
62    A library is only regarded if its conditional expression evaluates to 1.
63
64    Example:
65
66    DefaultSite=http://some-internet-site.org
67    if ( true )
68        MD5 = 0123456789abcdef0123456789abcdef
69        name = library-1.0.tar.gz
70        URL1 = http://some-other-internet-site.org/another-name.tgz
71        URL2 = $(DefaultSite)$(MD5)-$(name)
72
73    This tries to load a library first from some-other-internet-site.org and if
74    that fails from some-internet-site.org. The library is stored as $(MD5)-$(name)
75    even when it is loaded as another-name.tgz.
76
77=cut
78
79
80use strict;
81
82use File::Spec;
83use File::Path;
84use File::Basename;
85use Digest::MD5;
86use Digest::SHA;
87use URI;
88use LWP::UserAgent;
89
90my $Debug = 1;
91
92my $LocalEnvironment = undef;
93my $GlobalEnvironment = {};
94my @Missing = ();
95
96
97
98
99=head3 ProcessDataFile
100
101    Read the data file, typically named main/external_deps.lst, find the external
102    library tarballs that are not yet present in ext_sources/ and download them.
103
104=cut
105sub ProcessDataFile ($)
106{
107    my $filename = shift;
108
109    my $destination = $ENV{'TARFILE_LOCATION'};
110
111    die "can not open data file $filename" if ! -e $filename;
112
113    my $current_selector_value = 1;
114    my @URLHeads = ();
115    my @download_requests = ();
116
117    open my $in, $filename;
118    while (my $line = <$in>)
119    {
120        # Remove leading and trailing space and comments
121        $line =~ s/^\s+//;
122        $line =~ s/\s+$//;
123        $line =~ s/\s*#.*$//;
124
125        # Ignore empty lines.
126        next if $line eq "";
127
128        # An "if" statement starts a new block.
129        if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/)
130        {
131            ProcessLastBlock();
132
133            $LocalEnvironment = { 'selector' => $1 };
134        }
135
136        # Lines of the form name = value define a local variable.
137        elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/)
138        {
139            if (defined $LocalEnvironment)
140            {
141                $LocalEnvironment->{$1} = $2;
142            }
143            else
144            {
145                $GlobalEnvironment->{$1} = $2;
146            }
147        }
148        else
149        {
150            die "can not parse line $line\n";
151        }
152    }
153
154    ProcessLastBlock();
155
156    Download(\@download_requests, \@URLHeads);
157}
158
159
160
161
162=head3 ProcessLastBlock
163
164    Process the last definition of an external library.
165    If there is not last block, true for the first "if" statement, then the call is ignored.
166
167=cut
168sub ProcessLastBlock ()
169{
170    # Return if no block is defined.
171    return if ! defined $LocalEnvironment;
172
173    # Ignore the block if the selector does not match.
174    if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'})))
175    {
176        printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name'));
177    }
178    else
179    {
180        my $name = GetValue('name');
181        my $checksum = GetChecksum();
182
183        if ( ! IsPresent($name, $checksum))
184        {
185            AddDownloadRequest($name, $checksum);
186        }
187    }
188}
189
190
191
192
193=head3 AddDownloadRequest($name, $checksum)
194
195    Add a request for downloading the library $name to @Missing.
196    Collect all available URL[1-9] variables as source URLs.
197
198=cut
199sub AddDownloadRequest ($$)
200{
201    my ($name, $checksum) = @_;
202
203    print "adding download request for $name\n";
204
205    my $urls = [];
206    my $url = GetValue('URL');
207    push @$urls, SubstituteVariables($url) if (defined $url);
208    for (my $i=1; $i<10; ++$i)
209    {
210        $url = GetValue('URL'.$i);
211        next if ! defined $url;
212        push @$urls, SubstituteVariables($url);
213    }
214
215    push @Missing, [$name, $checksum, $urls];
216}
217
218
219
220
221=head3 GetChecksum()
222
223    When either MD5 or SHA1 are variables in the current scope then return
224    a reference to a hash with two entries:
225        'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum,
226        'value' is the actual checksum
227    Otherwise undef is returned.
228
229=cut
230sub GetChecksum()
231{
232    my $checksum = GetValue("MD5");
233    if (defined $checksum && $checksum ne "")
234    {
235        return { 'type' => 'MD5', 'value' => $checksum };
236    }
237    elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "")
238    {
239        return { 'type' => 'SHA1', 'value' => $checksum };
240    }
241    else
242    {
243        return undef;
244    }
245}
246
247
248
249
250=head3 GetValue($variable_name)
251
252    Return the value of the variable with name $variable_name from the local
253    environment or, if not defined there, the global environment.
254
255=cut
256sub GetValue ($)
257{
258    my $variable_name = shift;
259
260    my $candidate = $LocalEnvironment->{$variable_name};
261    return $candidate if defined $candidate;
262
263    return $GlobalEnvironment->{$variable_name};
264}
265
266
267
268=head3 SubstituteVariables($text)
269
270    Replace all references to variables in $text with the respective variable values.
271    This is done repeatedly until no variable reference remains.
272
273=cut
274sub SubstituteVariables ($)
275{
276    my $text = shift;
277
278    my $infinite_recursion_guard = 100;
279    while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/)
280    {
281        my ($head,$name,$tail) = ($1,$2,$3);
282        my $value = GetValue($name);
283        die "can not evaluate variable $name" if ! defined $value;
284        $text = $head.$value.$tail;
285
286        die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0;
287    }
288
289    return $text;
290}
291
292
293
294
295=head3 EvaluateExpression($expression)
296
297    Evaluate the $expression of an "if" statement to either 0 or 1. It can
298    be a single term (see EvaluateTerm for a description), or several terms
299    separated by either all ||s or &&s. A term can also be an expression
300    enclosed in parentheses.
301
302=cut
303sub EvaluateExpression ($)
304{
305    my $expression = shift;
306
307    # Evaluate sub expressions enclosed in parentheses.
308    while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/)
309    {
310        $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3;
311    }
312
313    if ($expression =~ /&&/ && $expression =~ /\|\|/)
314    {
315        die "expression can contain either && or || but not both at the same time";
316    }
317    elsif ($expression =~ /&&/)
318    {
319        foreach my $term (split (/\s*&&\s*/,$expression))
320        {
321            return 0 if ! EvaluateTerm($term);
322        }
323        return 1;
324    }
325    elsif ($expression =~ /\|\|/)
326    {
327        foreach my $term (split (/\s*\|\|\s*/,$expression))
328        {
329            return 1 if EvaluateTerm($term);
330        }
331        return 0;
332    }
333    else
334    {
335        return EvaluateTerm($expression);
336    }
337}
338
339
340
341
342=head3 EvaluateTerm($term)
343
344    Evaluate the $term to either 0 or 1.
345    A term is either the literal "true", which evaluates to 1, or an expression
346    of the form NAME=VALUE or NAME!=VALUE. NAME is the name of an environment
347    variable and VALUE any string. VALUE may be empty.
348
349=cut
350sub EvaluateTerm ($)
351{
352    my $term = shift;
353
354    if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/)
355    {
356        my ($variable_name, $operator, $given_value) = ($1,$2,$3);
357        my $variable_value = $ENV{$variable_name};
358        $variable_value = "" if ! defined $variable_value;
359
360        if ($operator eq "==")
361        {
362            return $variable_value eq $given_value;
363        }
364        elsif ($operator eq "!=")
365        {
366            return $variable_value ne $given_value;
367        }
368        else
369        {
370            die "unknown operator in term $term";
371        }
372    }
373    elsif ($term =~ /^\s*true\s*$/i)
374    {
375        return 1;
376    }
377    elsif ($term =~ /^\s*false\s*$/i)
378    {
379        return 0;
380    }
381    else
382    {
383        die "term $term is not of the form <environment-variable> (=|==) <value>";
384    }
385}
386
387
388
389
390=head IsPresent($name, $given_checksum)
391
392    Check if an external library tar ball with the basename $name already
393    exists in the target directory TARFILE_LOCATION. The basename is
394    prefixed with the MD5 or SHA1 checksum.
395    If the file exists then its checksum is compared to the given one.
396
397=cut
398sub IsPresent ($$)
399{
400    my ($name, $given_checksum) = @_;
401
402    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name);
403    return 0 unless -f $filename;
404
405    # File exists. Check if its checksum is correct.
406    my $checksum;
407    if ( ! defined $given_checksum)
408    {
409        print "no checksum given, can not verify\n";
410        return 1;
411    }
412    elsif ($given_checksum->{'type'} eq "MD5")
413    {
414        my $md5 = Digest::MD5->new();
415        open my $in, $filename;
416        $md5->addfile($in);
417        $checksum = $md5->hexdigest();
418    }
419    elsif ($given_checksum->{'type'} eq "SHA1")
420    {
421        my $sha1 = Digest::SHA->new("1");
422        open my $in, $filename;
423        $sha1->addfile($in);
424        $checksum = $sha1->hexdigest();
425    }
426    else
427    {
428        die "unsupported checksum type (not MD5 or SHA1)";
429    }
430
431    if ($given_checksum->{'value'} ne $checksum)
432    {
433        # Checksum does not match. Delete the file.
434        print "$name exists, but checksum does not match => deleting\n";
435        unlink($filename);
436        return 0;
437    }
438    else
439    {
440        printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'});
441        return 1;
442    }
443}
444
445
446
447
448=head3 Download
449
450    Download a set of files specified by @Missing.
451
452    For http URLs there may be an optional checksum. If it is present then downloaded
453    files that do not match that checksum lead to abortion of the current process.
454    Files that have already been downloaded are not downloaded again.
455
456=cut
457sub Download ()
458{
459    my $download_path = $ENV{'TARFILE_LOCATION'};
460
461    if (scalar @Missing > 0)
462    {
463        printf("downloading %d missing tar ball%s to %s\n",
464               scalar @Missing, scalar @Missing>0 ? "s" : "",
465               $download_path);
466    }
467    else
468    {
469        print "all external libraries present\n";
470        return;
471    }
472
473    # Download the missing files.
474    my $all_downloaded = 1;
475    for my $item (@Missing)
476    {
477        my ($name, $checksum, $urls) = @$item;
478
479        my $downloaded = 0;
480        foreach my $url (@$urls)
481        {
482            $downloaded = DownloadFile(
483                defined $checksum
484                    ? $checksum->{'value'}."-".$name
485                    : $name,
486                $url,
487                $checksum);
488            last if $downloaded
489        }
490        $all_downloaded &&= $downloaded;
491    }
492    die "some needed files could not be downloaded!" if !$all_downloaded;
493}
494
495
496
497
498=head3 DownloadFile($name,$URL,$checksum)
499
500    Download a single external library tarball. Its origin is given by $URL.
501    Its destination is $(TARFILE_LOCATION)/$checksum-$name.
502
503=cut
504sub DownloadFile ($$$)
505{
506    my $name = shift;
507    my $URL = shift;
508    my $checksum = shift;
509
510    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name);
511
512    my $temporary_filename = $filename . ".part";
513
514    print "downloading to $temporary_filename\n";
515    my $out;
516    open $out, ">$temporary_filename";
517    binmode($out);
518
519    # Prepare checksum
520    my $digest;
521    if (defined $checksum && $checksum->{'type'} eq "SHA1")
522    {
523        # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.)
524        $digest = Digest::SHA->new("1");
525    }
526    elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5")
527    {
528        # Use MD5 when explicitly requested or when no checksum type is given.
529        $digest = Digest::MD5->new();
530    }
531    else
532    {
533        die "checksum type ".$checksum->{'type'}." is not supported";
534    }
535
536    # Download the extension.
537    my $success = 0;
538
539    my $agent = LWP::UserAgent->new();
540    $agent->env_proxy;
541    my $response = $agent->get($URL);
542
543    $success = $response->is_success;
544    if ($success)
545    {
546        my $content = $response->content;
547        open $out, ">$temporary_filename";
548        binmode($out);
549        print $out $content;
550        $digest->add($content);
551    }
552    else
553    {
554        print "download from $URL failed (" . $response->status_line . ")\n";
555    }
556    close($out);
557
558    # When download was successful then check the checksum and rename the .part file
559    # into the actual extension name.
560    if ($success)
561    {
562        my $file_checksum = $digest->hexdigest();
563        if (defined $checksum)
564        {
565            if ($checksum->{'value'} eq $file_checksum)
566            {
567                printf("%s checksum is OK\n", $checksum->{'type'});
568            }
569            else
570            {
571                unlink($temporary_filename);
572                printf("    %s checksum does not match (%s instead of %s)\n",
573                       $checksum->{'type'},
574                       $file_checksum,
575                       $checksum->{'value'});
576                return 0;
577            }
578        }
579        else
580        {
581            # The datafile does not contain a checksum to match against.
582            # Display the one that was calculated for the downloaded file so that
583            # it can be integrated manually into the data file.
584            printf("checksum not given, md5 of file is %s\n", $file_checksum);
585            $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name);
586        }
587
588        rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
589        return 1;
590    }
591    else
592    {
593        unlink($temporary_filename);
594        print "    download failed\n";
595        return 0;
596    }
597}
598
599
600
601
602=head3 CheckDownloadDestination ()
603
604    Make sure that the download destination $TARFILE_LOCATION does exist. If
605    not, then the directory is created.
606
607=cut
608sub CheckDownloadDestination ()
609{
610    my $destination = $ENV{'TARFILE_LOCATION'};
611    die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq "");
612
613    if ( ! -d $destination)
614    {
615        File::Path::make_path($destination);
616        die "ERROR: can't create \$TARFILE_LOCATION" if ! -d $destination;
617    }
618}
619
620
621
622
623=head3 ProvideSpecialTarball ($url,$name,$name_converter)
624
625    A few tarballs need special handling. That is done here.
626
627=cut
628sub ProvideSpecialTarball ($$$)
629{
630    my $url = shift;
631    my $name = shift;
632    my $name_converter = shift;
633
634    return unless defined $url && $url ne "";
635
636    # See if we can find the executable.
637    my ($SOLARENV,$OUTPATH,$EXEEXT) = ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'});
638    $SOLARENV = "" unless defined $SOLARENV;
639    $OUTPATH = "" unless defined $OUTPATH;
640    $EXEEXT = "" unless defined $EXEEXT;
641    if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT))
642    {
643        print "found $name executable\n";
644        return;
645    }
646
647    # Download the source from the URL.
648    my $basename = basename(URI->new($url)->path());
649    die unless defined $basename;
650
651    if (defined $name_converter)
652    {
653        $basename = &{$name_converter}($basename);
654    }
655
656    # Has the source tar ball already been downloaded?
657    my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename));
658    if (scalar @candidates > 0)
659    {
660        # Yes.
661        print "$basename exists\n";
662        return;
663    }
664    else
665    {
666        # No, download it.
667        print "downloading $basename\n";
668        DownloadFile($basename, $url, undef);
669    }
670}
671
672
673
674
675
676# The main() functionality.
677
678die "usage: $0 <data-file-name>" if scalar @ARGV != 1;
679my $data_file = $ARGV[0];
680CheckDownloadDestination();
681ProcessDataFile($data_file);
682ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef);
683ProvideSpecialTarball(
684    $ENV{'EPM_URL'},
685    "epm",
686    sub{$_[0]=~s/-source//; return $_[0]});
687