1#!/usr/bin/perl
2
3=head1 NAME
4
5    download_external_libraries.pl - Load missing tarballs specified in main/external_libs.lst.
6
7=head1 SYNOPSIS
8
9    For downloading external libraries (typically from the main/bootstrap script):
10
11    download_external_libraries(<data-file-name>);
12
13=head1 DESCRIPTION
14
15    The contents of the main/external_libs.lst file are used to determine the
16    external library tarballs that are missing from ext_sources/.
17
18    Individual libraries can be ignored depending on the values of environment variables.
19
20    Format of the main/external_libs.lst file:
21
22    The file is line based.
23    Comments start with a # and go to the end of the line and are ignored.
24    Lines that are empty or contain only spaces and/or comments are ignored.
25
26    All other lines can have one of two forms:
27    - A variable definition of the form <name>=<value>.
28    - A conditional block start in the form "if (<expression>)"
29
30    Variables defined in a conditional block are only visible in this block and
31    replace the definition of global variables and variables earlier in the same
32    block.
33    Some variables have special names:
34    - MD5 is the expected MD5 checksum of the library tarball.
35    - SHA1 is the expected SHA1 checksum of the library tarball.
36    - URL1 to URL9 specify from where to download the tarball.  The urls are tried in order.
37      The first successful download (download completed and checksum match) stops the iteration.
38
39    Expressions are explained below in the comment of EvaluateExpression().
40
41    A library is only regarded if its conditional expression evaluates to 1.
42
43    Example:
44
45    DefaultSite=http://some-internet-site.org
46    if ( true )
47        MD5 = 0123456789abcdef0123456789abcdef
48        name = library-1.0.tar.gz
49        URL1 = http://some-other-internet-site.org/another-name.tgz
50        URL2 = $(DefaultSite)$(MD5)-$(name)
51
52    This tries to load a library first from some-other-internet-site.org and if
53    that fails from some-internet-site.org.  The library is stored as $(MD5)-$(name)
54    even when it is loaded as another-name.tgz.
55
56=cut
57
58
59use strict;
60
61use File::Spec;
62use File::Path;
63use File::Basename;
64use LWP::UserAgent;
65use Digest::MD5;
66use Digest::SHA;
67use URI;
68
69my $Debug = 1;
70
71my $LocalEnvironment = undef;
72my $GlobalEnvironment = {};
73my @Missing = ();
74
75
76
77
78=head3 ProcessDataFile
79
80    Read the data file, typically named main/external_libs.lst, find the external
81    library tarballs that are not yet present in ext_sources/ and download them.
82
83=cut
84sub ProcessDataFile ($)
85{
86    my $filename = shift;
87
88    my $destination = $ENV{'TARFILE_LOCATION'};
89
90    die "can not open data file $filename" if ! -e $filename;
91
92    my $current_selector_value = 1;
93    my @URLHeads = ();
94    my @download_requests = ();
95
96    open my $in, $filename;
97    while (my $line = <$in>)
98    {
99        # Remove leading and trailing space and comments
100        $line =~ s/^\s+//;
101        $line =~ s/\s+$//;
102        $line =~ s/\s*#.*$//;
103
104        # Ignore empty lines.
105        next if $line eq "";
106
107        # An "if" statement starts a new block.
108        if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/)
109        {
110            ProcessLastBlock();
111
112            $LocalEnvironment = { 'selector' => $1 };
113        }
114
115        # Lines of the form name = value define a local variable.
116        elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/)
117        {
118            if (defined $LocalEnvironment)
119            {
120                $LocalEnvironment->{$1} = $2;
121            }
122            else
123            {
124                $GlobalEnvironment->{$1} = $2;
125            }
126        }
127        else
128        {
129            die "can not parse line $line\n";
130        }
131    }
132
133    ProcessLastBlock();
134
135    Download(\@download_requests, \@URLHeads);
136}
137
138
139
140
141=head3 ProcessLastBlock
142
143    Process the last definition of an external library.
144    If there is not last block, true for the first "if" statement, then the call is ignored.
145
146=cut
147sub ProcessLastBlock ()
148{
149    # Return if no block is defined.
150    return if ! defined $LocalEnvironment;
151
152    # Ignore the block if the selector does not match.
153    if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'})))
154    {
155        printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name'));
156    }
157    else
158    {
159        my $name = GetValue('name');
160        my $checksum = GetChecksum();
161
162        if ( ! defined $checksum)
163        {
164            die "no checksum given for $name";
165        }
166        elsif ( ! IsPresent($name, $checksum))
167        {
168            AddDownloadRequest($name, $checksum);
169        }
170    }
171}
172
173
174
175
176=head3 AddDownloadRequest($name, $checksum)
177
178    Add a request for downloading the library $name to @Missing.
179    Collect all available URL[1-9] variables as source URLs.
180
181=cut
182sub AddDownloadRequest ($$)
183{
184    my ($name, $checksum) = @_;
185
186    print "adding download request for $name\n";
187
188    my $urls = [];
189    my $url = GetValue('URL');
190    push @$urls, SubstituteVariables($url) if (defined $url);
191    for (my $i=1; $i<10; ++$i)
192    {
193        $url = GetValue('URL'.$i);
194        next if ! defined $url;
195        push @$urls, SubstituteVariables($url);
196    }
197
198    push @Missing, [$name, $checksum, $urls];
199}
200
201
202
203
204=head3 GetChecksum()
205
206    When either MD5 or SHA1 are variables in the current scope then return
207    a reference to a hash with two entries:
208        'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum,
209        'value' is the actual checksum
210    Otherwise undef is returned.
211
212=cut
213sub GetChecksum()
214{
215    my $checksum = GetValue("MD5");
216    if (defined $checksum && $checksum ne "")
217    {
218        return { 'type' => 'MD5', 'value' => $checksum };
219    }
220    elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "")
221    {
222        return { 'type' => 'SHA1', 'value' => $checksum };
223    }
224    else
225    {
226        return undef;
227    }
228}
229
230
231
232
233=head3 GetValue($variable_name)
234
235    Return the value of the variable with name $variable_name from the local
236    environment or, if not defined there, the global environment.
237
238=cut
239sub GetValue ($)
240{
241    my $variable_name = shift;
242
243    my $candidate = $LocalEnvironment->{$variable_name};
244    return $candidate if defined $candidate;
245
246    return $GlobalEnvironment->{$variable_name};
247}
248
249
250
251=head3 SubstituteVariables($text)
252
253    Replace all references to variables in $text with the respective variable values.
254    This is done repeatedly until no variable reference remains.
255
256=cut
257sub SubstituteVariables ($)
258{
259    my $text = shift;
260
261    my $infinite_recursion_guard = 100;
262    while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/)
263    {
264        my ($head,$name,$tail) = ($1,$2,$3);
265        my $value = GetValue($name);
266        die "can evaluate variable $name" if ! defined $value;
267        $text = $head.$value.$tail;
268
269        die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0;
270    }
271
272    return $text;
273}
274
275
276
277
278=head3 EvaluateExpression($expression)
279
280    Evaluate the $expression of an "if" statement to either 0 or 1.  It can
281    be a single term (see EvaluateTerm for a description), or several terms
282    separated by either all ||s or &&s.  A term can also be an expression
283    enclosed in parantheses.
284
285=cut
286sub EvaluateExpression ($)
287{
288    my $expression = shift;
289
290    # Evaluate sub expressions enclosed in parantheses.
291    while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/)
292    {
293        $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3;
294    }
295
296    if ($expression =~ /&&/ && $expression =~ /\|\|/)
297    {
298        die "expression can contain either && or || but not both at the same time";
299    }
300    elsif ($expression =~ /&&/)
301    {
302        foreach my $term (split (/\s*&&\s*/,$expression))
303        {
304            return 0 if ! EvaluateTerm($term);
305        }
306        return 1;
307    }
308    elsif ($expression =~ /\|\|/)
309    {
310        foreach my $term (split (/\s*\|\|\s*/,$expression))
311        {
312            return 1 if EvaluateTerm($term);
313        }
314        return 0;
315    }
316    else
317    {
318        return EvaluateTerm($expression);
319    }
320}
321
322
323
324
325=head3 EvaluateTerm($term)
326
327    Evaluate the $term to either 0 or 1.
328    A term is either the literal "true", which evaluates to 1, or an expression
329    of the form NAME=VALUE or NAME!=VALUE.  NAME is the name of an environment
330    variable and VALUE any string.  VALUE may be empty.
331
332=cut
333sub EvaluateTerm ($)
334{
335    my $term = shift;
336
337    if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/)
338    {
339        my ($variable_name, $operator, $given_value) = ($1,$2,$3);
340        my $variable_value = $ENV{$variable_name};
341        $variable_value = "" if ! defined $variable_value;
342
343        if ($operator eq "==")
344        {
345            return $variable_value eq $given_value;
346        }
347        elsif ($operator eq "!=")
348        {
349            return $variable_value ne $given_value;
350        }
351        else
352        {
353            die "unknown operator in term $term";
354        }
355    }
356    elsif ($term =~ /^\s*true\s*$/i)
357    {
358        return 1;
359    }
360    elsif ($term =~ /^\s*false\s*$/i)
361    {
362        return 0;
363    }
364    else
365    {
366        die "term $term is not of the form <environment-variable> (=|==) <value>";
367    }
368}
369
370
371
372
373=head IsPresent($name, $given_checksum)
374
375    Check if an external library tar ball with the basename $name already
376    exists in the target directory TARFILE_LOCATION.  The basename is
377    prefixed with the MD5 or SHA1 checksum.
378    If the file exists then its checksum is compared to the given one.
379
380=cut
381sub IsPresent ($$)
382{
383    my ($name, $given_checksum) = @_;
384
385    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name);
386    return 0 unless -f $filename;
387
388    # File exists.  Check if its checksum is correct.
389    my $checksum;
390    if ($given_checksum->{'type'} eq "MD5")
391    {
392        my $md5 = Digest::MD5->new();
393        open my $in, $filename;
394        $md5->addfile($in);
395        $checksum = $md5->hexdigest();
396    }
397    elsif ($given_checksum->{'type'} eq "SHA1")
398    {
399        my $sha1 = Digest::SHA->new("1");
400        open my $in, $filename;
401        $sha1->addfile($in);
402        $checksum = $sha1->hexdigest();
403    }
404    else
405    {
406        die "unsupported checksum type (not MD5 or SHA1)";
407    }
408
409    if ($given_checksum->{'value'} ne $checksum)
410    {
411        # Checksum does not match.  Delete the file.
412        print "$name exists, but checksum does not match => deleting\n";
413        #unlink($filename);
414        return 0;
415    }
416    else
417    {
418        printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'});
419        return 1;
420    }
421}
422
423
424
425
426=head3 Download
427
428    Download a set of files specified by @Missing.
429
430    For http URLs there may be an optional checksum.  If it is present then downloaded
431    files that do not match that checksum lead to abortion of the current process.
432    Files that have already been downloaded are not downloaded again.
433
434=cut
435sub Download ()
436{
437    my $download_path = $ENV{'TARFILE_LOCATION'};
438
439    if (scalar @Missing > 0)
440    {
441        printf("downloading %d missing tar ball%s to %s\n",
442               scalar @Missing, scalar @Missing>0 ? "s" : "",
443               $download_path);
444    }
445    else
446    {
447        print "all external libraries present\n";
448        return;
449    }
450
451    # Download the missing files.
452    for my $item (@Missing)
453    {
454        my ($name, $checksum, $urls) = @$item;
455
456        foreach my $url (@$urls)
457        {
458            last if DownloadFile($checksum->{'value'}."-".$name, $url, $checksum);
459        }
460    }
461}
462
463
464
465
466=head3 DownloadFile($name,$URL,$checksum)
467
468    Download a single external library tarball.  It origin is given by $URL.
469    Its destination is $(TARFILE_LOCATION)/$checksum-$name.
470
471=cut
472sub DownloadFile ($$$)
473{
474    my $name = shift;
475    my $URL = shift;
476    my $checksum = shift;
477
478    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name);
479
480    my $temporary_filename = $filename . ".part";
481
482    print "downloading to $temporary_filename\n";
483    open my $out, ">$temporary_filename";
484    binmode($out);
485
486    # Prepare checksum
487    my $digest;
488    if (defined $checksum && $checksum->{'type'} eq "SHA1")
489    {
490        # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.)
491        $digest = Digest::SHA->new("1");
492    }
493    elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5")
494    {
495        # Use MD5 when explicitly requested or when no checksum type is given.
496        $digest = Digest::MD5->new();
497    }
498    else
499    {
500        die "checksum type ".$checksum->{'type'}." is not supported";
501    }
502
503    # Download the extension.
504    my $agent = LWP::UserAgent->new();
505    $agent->timeout(120);
506    $agent->show_progress(1);
507    my $last_was_redirect = 0;
508    $agent->add_handler('response_redirect'
509                        => sub{
510                            $last_was_redirect = 1;
511                            return;
512                        });
513    $agent->add_handler('response_data'
514                        => sub{
515                            if ($last_was_redirect)
516                            {
517                                $last_was_redirect = 0;
518                                # Throw away the data we got so far.
519                                $checksum->reset();
520                                close $out;
521                                open $out, ">$temporary_filename";
522                                binmode($out);
523                            }
524                            my($response,$agent,$h,$data)=@_;
525                            print $out $data;
526                            $digest->add($data);
527                        });
528
529    my $response = $agent->get($URL);
530    close $out;
531
532    # When download was successfull then check the checksum and rename the .part file
533    # into the actual extension name.
534    if ($response->is_success())
535    {
536        my $file_checksum = $digest->hexdigest();
537        if (defined $checksum)
538        {
539            if ($checksum->{'value'} eq $file_checksum)
540            {
541                printf("%s checksum is OK\n", $checksum->{'type'});
542            }
543            else
544            {
545                unlink($temporary_filename);
546                printf("    %s checksum does not match (%s instead of %s)\n",
547                       $file_checksum,
548                       $checksum->{'value'},
549                       $checksum->{'type'});
550                return 0;
551            }
552        }
553        else
554        {
555            # The datafile does not contain a checksum to match against.
556            # Display the one that was calculated for the downloaded file so that
557            # it can be integrated manually into the data file.
558            printf("checksum not given, md5 of file is %s\n", $file_checksum);
559            $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name);
560        }
561
562        rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
563        return 1;
564    }
565    else
566    {
567        unlink($temporary_filename);
568        print "    download failed\n";
569        return 0;
570    }
571}
572
573
574
575
576=head3 CheckDownloadDestination ()
577
578    Make sure that the download destination $TARFILE_LOCATION does exist.  If
579    not, then the directory is created.
580
581=cut
582sub CheckDownloadDestination ()
583{
584    my $destination = $ENV{'TARFILE_LOCATION'};
585    die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq "");
586
587    if ( ! -d $destination)
588    {
589        File::Path::make_path($destination);
590        die "ERROR: can't create \$TARFILE_LOCATION" if  ! -d $destination;
591    }
592}
593
594
595
596
597=head3 ProvideSpecialTarball ($url,$name,$name_converter)
598
599    A few tarballs need special handling.  That is done here.
600
601=cut
602sub ProvideSpecialTarball ($$$)
603{
604    my $url = shift;
605    my $name = shift;
606    my $name_converter = shift;
607
608    return unless defined $url && $url ne "";
609
610    # See if we can find the executable.
611    my ($SOLARENV,$OUTPATH,$EXEEXT) =  ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'});
612    $SOLARENV = "" unless defined $SOLARENV;
613    $OUTPATH = "" unless defined $OUTPATH;
614    $EXEEXT = "" unless defined $EXEEXT;
615    if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT))
616    {
617        print "found $name executable\n";
618        return;
619    }
620
621    # Download the source from the URL.
622    my $basename = basename(URI->new($url)->path());
623    die unless defined $basename;
624
625    if (defined $name_converter)
626    {
627        $basename = &{$name_converter}($basename);
628    }
629
630    # Has the source tar ball already been downloaded?
631    my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename));
632    if (scalar @candidates > 0)
633    {
634        # Yes.
635        print "$basename exists\n";
636        return;
637    }
638    else
639    {
640        # No, download it.
641        print "downloading $basename\n";
642        DownloadFile($basename, $url, undef);
643    }
644}
645
646
647
648
649
650# The main() functionality.
651
652die "usage: $0 <data-file-name>" if scalar @ARGV != 1;
653my $data_file = $ARGV[0];
654CheckDownloadDestination();
655ProcessDataFile($data_file);
656ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef);
657ProvideSpecialTarball(
658    $ENV{'EPM_URL'},
659    "epm",
660    sub{$_[0]=~s/-source//; return $_[0]});
661