1#!/usr/bin/perl
2
3#**************************************************************
4#
5#  Licensed to the Apache Software Foundation (ASF) under one
6#  or more contributor license agreements.  See the NOTICE file
7#  distributed with this work for additional information
8#  regarding copyright ownership.  The ASF licenses this file
9#  to you under the Apache License, Version 2.0 (the
10#  "License"); you may not use this file except in compliance
11#  with the License.  You may obtain a copy of the License at
12#
13#    http://www.apache.org/licenses/LICENSE-2.0
14#
15#  Unless required by applicable law or agreed to in writing,
16#  software distributed under the License is distributed on an
17#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18#  KIND, either express or implied.  See the License for the
19#  specific language governing permissions and limitations
20#  under the License.
21#
22#**************************************************************
23
24=head1 NAME
25
26    download_external_libraries.pl - Load missing tarballs specified in main/external_libs.lst.
27
28=head1 SYNOPSIS
29
30    For downloading external libraries (typically from the main/bootstrap script):
31
32    download_external_libraries(<data-file-name>);
33
34=head1 DESCRIPTION
35
36    The contents of the main/external_libs.lst file are used to determine the
37    external library tarballs that are missing from ext_sources/.
38
39    Individual libraries can be ignored depending on the values of environment variables.
40
41    Format of the main/external_libs.lst file:
42
43    The file is line based.
44    Comments start with a # and go to the end of the line and are ignored.
45    Lines that are empty or contain only spaces and/or comments are ignored.
46
47    All other lines can have one of two forms:
48    - A variable definition of the form <name>=<value>.
49    - A conditional block start in the form "if (<expression>)"
50
51    Variables defined in a conditional block are only visible in this block and
52    replace the definition of global variables and variables earlier in the same
53    block.
54    Some variables have special names:
55    - MD5 is the expected MD5 checksum of the library tarball.
56    - SHA1 is the expected SHA1 checksum of the library tarball.
57    - URL1 to URL9 specify from where to download the tarball.  The urls are tried in order.
58      The first successful download (download completed and checksum match) stops the iteration.
59
60    Expressions are explained below in the comment of EvaluateExpression().
61
62    A library is only regarded if its conditional expression evaluates to 1.
63
64    Example:
65
66    DefaultSite=http://some-internet-site.org
67    if ( true )
68        MD5 = 0123456789abcdef0123456789abcdef
69        name = library-1.0.tar.gz
70        URL1 = http://some-other-internet-site.org/another-name.tgz
71        URL2 = $(DefaultSite)$(MD5)-$(name)
72
73    This tries to load a library first from some-other-internet-site.org and if
74    that fails from some-internet-site.org.  The library is stored as $(MD5)-$(name)
75    even when it is loaded as another-name.tgz.
76
77=cut
78
79
80use strict;
81
82use File::Spec;
83use File::Path;
84use File::Basename;
85use Digest::MD5;
86use Digest::SHA;
87use URI;
88my $simple = 1;
89if ($simple)
90{
91    use LWP::Simple;
92}
93else
94{
95    use LWP::UserAgent;
96}
97
98my $Debug = 1;
99
100my $LocalEnvironment = undef;
101my $GlobalEnvironment = {};
102my @Missing = ();
103
104
105
106
107=head3 ProcessDataFile
108
109    Read the data file, typically named main/external_libs.lst, find the external
110    library tarballs that are not yet present in ext_sources/ and download them.
111
112=cut
113sub ProcessDataFile ($)
114{
115    my $filename = shift;
116
117    my $destination = $ENV{'TARFILE_LOCATION'};
118
119    die "can not open data file $filename" if ! -e $filename;
120
121    my $current_selector_value = 1;
122    my @URLHeads = ();
123    my @download_requests = ();
124
125    open my $in, $filename;
126    while (my $line = <$in>)
127    {
128        # Remove leading and trailing space and comments
129        $line =~ s/^\s+//;
130        $line =~ s/\s+$//;
131        $line =~ s/\s*#.*$//;
132
133        # Ignore empty lines.
134        next if $line eq "";
135
136        # An "if" statement starts a new block.
137        if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/)
138        {
139            ProcessLastBlock();
140
141            $LocalEnvironment = { 'selector' => $1 };
142        }
143
144        # Lines of the form name = value define a local variable.
145        elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/)
146        {
147            if (defined $LocalEnvironment)
148            {
149                $LocalEnvironment->{$1} = $2;
150            }
151            else
152            {
153                $GlobalEnvironment->{$1} = $2;
154            }
155        }
156        else
157        {
158            die "can not parse line $line\n";
159        }
160    }
161
162    ProcessLastBlock();
163
164    Download(\@download_requests, \@URLHeads);
165}
166
167
168
169
170=head3 ProcessLastBlock
171
172    Process the last definition of an external library.
173    If there is not last block, true for the first "if" statement, then the call is ignored.
174
175=cut
176sub ProcessLastBlock ()
177{
178    # Return if no block is defined.
179    return if ! defined $LocalEnvironment;
180
181    # Ignore the block if the selector does not match.
182    if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'})))
183    {
184        printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name'));
185    }
186    else
187    {
188        my $name = GetValue('name');
189        my $checksum = GetChecksum();
190
191        if ( ! IsPresent($name, $checksum))
192        {
193            AddDownloadRequest($name, $checksum);
194        }
195    }
196}
197
198
199
200
201=head3 AddDownloadRequest($name, $checksum)
202
203    Add a request for downloading the library $name to @Missing.
204    Collect all available URL[1-9] variables as source URLs.
205
206=cut
207sub AddDownloadRequest ($$)
208{
209    my ($name, $checksum) = @_;
210
211    print "adding download request for $name\n";
212
213    my $urls = [];
214    my $url = GetValue('URL');
215    push @$urls, SubstituteVariables($url) if (defined $url);
216    for (my $i=1; $i<10; ++$i)
217    {
218        $url = GetValue('URL'.$i);
219        next if ! defined $url;
220        push @$urls, SubstituteVariables($url);
221    }
222
223    push @Missing, [$name, $checksum, $urls];
224}
225
226
227
228
229=head3 GetChecksum()
230
231    When either MD5 or SHA1 are variables in the current scope then return
232    a reference to a hash with two entries:
233        'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum,
234        'value' is the actual checksum
235    Otherwise undef is returned.
236
237=cut
238sub GetChecksum()
239{
240    my $checksum = GetValue("MD5");
241    if (defined $checksum && $checksum ne "")
242    {
243        return { 'type' => 'MD5', 'value' => $checksum };
244    }
245    elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "")
246    {
247        return { 'type' => 'SHA1', 'value' => $checksum };
248    }
249    else
250    {
251        return undef;
252    }
253}
254
255
256
257
258=head3 GetValue($variable_name)
259
260    Return the value of the variable with name $variable_name from the local
261    environment or, if not defined there, the global environment.
262
263=cut
264sub GetValue ($)
265{
266    my $variable_name = shift;
267
268    my $candidate = $LocalEnvironment->{$variable_name};
269    return $candidate if defined $candidate;
270
271    return $GlobalEnvironment->{$variable_name};
272}
273
274
275
276=head3 SubstituteVariables($text)
277
278    Replace all references to variables in $text with the respective variable values.
279    This is done repeatedly until no variable reference remains.
280
281=cut
282sub SubstituteVariables ($)
283{
284    my $text = shift;
285
286    my $infinite_recursion_guard = 100;
287    while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/)
288    {
289        my ($head,$name,$tail) = ($1,$2,$3);
290        my $value = GetValue($name);
291        die "can not evaluate variable $name" if ! defined $value;
292        $text = $head.$value.$tail;
293
294        die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0;
295    }
296
297    return $text;
298}
299
300
301
302
303=head3 EvaluateExpression($expression)
304
305    Evaluate the $expression of an "if" statement to either 0 or 1.  It can
306    be a single term (see EvaluateTerm for a description), or several terms
307    separated by either all ||s or &&s.  A term can also be an expression
308    enclosed in parantheses.
309
310=cut
311sub EvaluateExpression ($)
312{
313    my $expression = shift;
314
315    # Evaluate sub expressions enclosed in parantheses.
316    while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/)
317    {
318        $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3;
319    }
320
321    if ($expression =~ /&&/ && $expression =~ /\|\|/)
322    {
323        die "expression can contain either && or || but not both at the same time";
324    }
325    elsif ($expression =~ /&&/)
326    {
327        foreach my $term (split (/\s*&&\s*/,$expression))
328        {
329            return 0 if ! EvaluateTerm($term);
330        }
331        return 1;
332    }
333    elsif ($expression =~ /\|\|/)
334    {
335        foreach my $term (split (/\s*\|\|\s*/,$expression))
336        {
337            return 1 if EvaluateTerm($term);
338        }
339        return 0;
340    }
341    else
342    {
343        return EvaluateTerm($expression);
344    }
345}
346
347
348
349
350=head3 EvaluateTerm($term)
351
352    Evaluate the $term to either 0 or 1.
353    A term is either the literal "true", which evaluates to 1, or an expression
354    of the form NAME=VALUE or NAME!=VALUE.  NAME is the name of an environment
355    variable and VALUE any string.  VALUE may be empty.
356
357=cut
358sub EvaluateTerm ($)
359{
360    my $term = shift;
361
362    if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/)
363    {
364        my ($variable_name, $operator, $given_value) = ($1,$2,$3);
365        my $variable_value = $ENV{$variable_name};
366        $variable_value = "" if ! defined $variable_value;
367
368        if ($operator eq "==")
369        {
370            return $variable_value eq $given_value;
371        }
372        elsif ($operator eq "!=")
373        {
374            return $variable_value ne $given_value;
375        }
376        else
377        {
378            die "unknown operator in term $term";
379        }
380    }
381    elsif ($term =~ /^\s*true\s*$/i)
382    {
383        return 1;
384    }
385    elsif ($term =~ /^\s*false\s*$/i)
386    {
387        return 0;
388    }
389    else
390    {
391        die "term $term is not of the form <environment-variable> (=|==) <value>";
392    }
393}
394
395
396
397
398=head IsPresent($name, $given_checksum)
399
400    Check if an external library tar ball with the basename $name already
401    exists in the target directory TARFILE_LOCATION.  The basename is
402    prefixed with the MD5 or SHA1 checksum.
403    If the file exists then its checksum is compared to the given one.
404
405=cut
406sub IsPresent ($$)
407{
408    my ($name, $given_checksum) = @_;
409
410    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name);
411    return 0 unless -f $filename;
412
413    # File exists.  Check if its checksum is correct.
414    my $checksum;
415    if ( ! defined $given_checksum)
416    {
417        print "no checksum given, can not verify\n";
418        return 1;
419    }
420    elsif ($given_checksum->{'type'} eq "MD5")
421    {
422        my $md5 = Digest::MD5->new();
423        open my $in, $filename;
424        $md5->addfile($in);
425        $checksum = $md5->hexdigest();
426    }
427    elsif ($given_checksum->{'type'} eq "SHA1")
428    {
429        my $sha1 = Digest::SHA->new("1");
430        open my $in, $filename;
431        $sha1->addfile($in);
432        $checksum = $sha1->hexdigest();
433    }
434    else
435    {
436        die "unsupported checksum type (not MD5 or SHA1)";
437    }
438
439    if ($given_checksum->{'value'} ne $checksum)
440    {
441        # Checksum does not match.  Delete the file.
442        print "$name exists, but checksum does not match => deleting\n";
443        unlink($filename);
444        return 0;
445    }
446    else
447    {
448        printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'});
449        return 1;
450    }
451}
452
453
454
455
456=head3 Download
457
458    Download a set of files specified by @Missing.
459
460    For http URLs there may be an optional checksum.  If it is present then downloaded
461    files that do not match that checksum lead to abortion of the current process.
462    Files that have already been downloaded are not downloaded again.
463
464=cut
465sub Download ()
466{
467    my $download_path = $ENV{'TARFILE_LOCATION'};
468
469    if (scalar @Missing > 0)
470    {
471        printf("downloading %d missing tar ball%s to %s\n",
472               scalar @Missing, scalar @Missing>0 ? "s" : "",
473               $download_path);
474    }
475    else
476    {
477        print "all external libraries present\n";
478        return;
479    }
480
481    # Download the missing files.
482    for my $item (@Missing)
483    {
484        my ($name, $checksum, $urls) = @$item;
485
486        foreach my $url (@$urls)
487        {
488            last if DownloadFile(
489                defined $checksum
490                    ? $checksum->{'value'}."-".$name
491                    : $name,
492                $url,
493                $checksum);
494        }
495    }
496}
497
498
499
500
501=head3 DownloadFile($name,$URL,$checksum)
502
503    Download a single external library tarball.  It origin is given by $URL.
504    Its destination is $(TARFILE_LOCATION)/$checksum-$name.
505
506=cut
507sub DownloadFile ($$$)
508{
509    my $name = shift;
510    my $URL = shift;
511    my $checksum = shift;
512
513    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name);
514
515    my $temporary_filename = $filename . ".part";
516
517    print "downloading to $temporary_filename\n";
518    my $out;
519    open $out, ">$temporary_filename";
520    binmode($out);
521
522    # Prepare checksum
523    my $digest;
524    if (defined $checksum && $checksum->{'type'} eq "SHA1")
525    {
526        # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.)
527        $digest = Digest::SHA->new("1");
528    }
529    elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5")
530    {
531        # Use MD5 when explicitly requested or when no checksum type is given.
532        $digest = Digest::MD5->new();
533    }
534    else
535    {
536        die "checksum type ".$checksum->{'type'}." is not supported";
537    }
538
539    # Download the extension.
540    my $success = 0;
541    if ($simple)
542    {
543	my $content = LWP::Simple::get($URL);
544	$success = defined $content;
545	if ($success)
546	{
547	    open $out, ">$temporary_filename";
548	    binmode($out);
549	    print $out $content;
550	    close($out);
551	    $digest->add($content);
552	}
553	else
554	{
555	    print "download from $URL failed\n";
556	}
557    }
558    else
559    {
560	my $agent = LWP::UserAgent->new();
561	$agent->timeout(120);
562	$agent->env_proxy;
563	$agent->show_progress(1);
564	my $last_was_redirect = 0;
565	$agent->add_handler('response_redirect'
566			    => sub{
567				$last_was_redirect = 1;
568				return;
569			    });
570	$agent->add_handler('response_data'
571			    => sub{
572				if ($last_was_redirect)
573				{
574				    $last_was_redirect = 0;
575				    # Throw away the data we got so far.
576				    $digest->reset();
577				    close $out;
578				    open $out, ">$temporary_filename";
579				    binmode($out);
580				}
581				my($response,$agent,$h,$data)=@_;
582				print $out $data;
583				$digest->add($data);
584			    });
585
586	$success = $agent->get($URL)->is_success();
587	close $out;
588    }
589
590    # When download was successfull then check the checksum and rename the .part file
591    # into the actual extension name.
592    if ($success)
593    {
594        my $file_checksum = $digest->hexdigest();
595        if (defined $checksum)
596        {
597            if ($checksum->{'value'} eq $file_checksum)
598            {
599                printf("%s checksum is OK\n", $checksum->{'type'});
600            }
601            else
602            {
603                unlink($temporary_filename);
604                printf("    %s checksum does not match (%s instead of %s)\n",
605                       $file_checksum,
606                       $checksum->{'value'},
607                       $checksum->{'type'});
608                return 0;
609            }
610        }
611        else
612        {
613            # The datafile does not contain a checksum to match against.
614            # Display the one that was calculated for the downloaded file so that
615            # it can be integrated manually into the data file.
616            printf("checksum not given, md5 of file is %s\n", $file_checksum);
617            $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name);
618        }
619
620        rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
621        return 1;
622    }
623    else
624    {
625        unlink($temporary_filename);
626        print "    download failed\n";
627        return 0;
628    }
629}
630
631
632
633
634=head3 CheckDownloadDestination ()
635
636    Make sure that the download destination $TARFILE_LOCATION does exist.  If
637    not, then the directory is created.
638
639=cut
640sub CheckDownloadDestination ()
641{
642    my $destination = $ENV{'TARFILE_LOCATION'};
643    die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq "");
644
645    if ( ! -d $destination)
646    {
647        File::Path::make_path($destination);
648        die "ERROR: can't create \$TARFILE_LOCATION" if  ! -d $destination;
649    }
650}
651
652
653
654
655=head3 ProvideSpecialTarball ($url,$name,$name_converter)
656
657    A few tarballs need special handling.  That is done here.
658
659=cut
660sub ProvideSpecialTarball ($$$)
661{
662    my $url = shift;
663    my $name = shift;
664    my $name_converter = shift;
665
666    return unless defined $url && $url ne "";
667
668    # See if we can find the executable.
669    my ($SOLARENV,$OUTPATH,$EXEEXT) =  ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'});
670    $SOLARENV = "" unless defined $SOLARENV;
671    $OUTPATH = "" unless defined $OUTPATH;
672    $EXEEXT = "" unless defined $EXEEXT;
673    if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT))
674    {
675        print "found $name executable\n";
676        return;
677    }
678
679    # Download the source from the URL.
680    my $basename = basename(URI->new($url)->path());
681    die unless defined $basename;
682
683    if (defined $name_converter)
684    {
685        $basename = &{$name_converter}($basename);
686    }
687
688    # Has the source tar ball already been downloaded?
689    my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename));
690    if (scalar @candidates > 0)
691    {
692        # Yes.
693        print "$basename exists\n";
694        return;
695    }
696    else
697    {
698        # No, download it.
699        print "downloading $basename\n";
700        DownloadFile($basename, $url, undef);
701    }
702}
703
704
705
706
707
708# The main() functionality.
709
710die "usage: $0 <data-file-name>" if scalar @ARGV != 1;
711my $data_file = $ARGV[0];
712CheckDownloadDestination();
713ProcessDataFile($data_file);
714ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef);
715ProvideSpecialTarball(
716    $ENV{'EPM_URL'},
717    "epm",
718    sub{$_[0]=~s/-source//; return $_[0]});
719