1#!/usr/bin/perl
2
3=head1 NAME
4
5    download_external_libraries.pl - Load missing tarballs specified in main/external_libs.lst.
6
7=head1 SYNOPSIS
8
9    For downloading external libraries (typically from the main/bootstrap script):
10
11    download_external_libraries(<data-file-name>);
12
13=head1 DESCRIPTION
14
15    The contents of the main/external_libs.lst file are used to determine the
16    external library tarballs that are missing from ext_sources/.
17
18    Individual libraries can be ignored depending on the values of environment variables.
19
20    Format of the main/external_libs.lst file:
21
22    The file is line based.
23    Comments start with a # and go to the end of the line and are ignored.
24    Lines that are empty or contain only spaces and/or comments are ignored.
25
26    All other lines can have one of two forms:
27    - A variable definition of the form <name>=<value>.
28    - A conditional block start in the form "if (<expression>)"
29
30    Variables defined in a conditional block are only visible in this block and
31    replace the definition of global variables and variables earlier in the same
32    block.
33    Some variables have special names:
34    - MD5 is the expected MD5 checksum of the library tarball.
35    - SHA1 is the expected SHA1 checksum of the library tarball.
36    - URL1 to URL9 specify from where to download the tarball.  The urls are tried in order.
37      The first successful download (download completed and checksum match) stops the iteration.
38
39    Expressions are explained below in the comment of EvaluateExpression().
40
41    A library is only regarded if its conditional expression evaluates to 1.
42
43    Example:
44
45    DefaultSite=http://some-internet-site.org
46    if ( true )
47        MD5 = 0123456789abcdef0123456789abcdef
48        name = library-1.0.tar.gz
49        URL1 = http://some-other-internet-site.org/another-name.tgz
50        URL2 = $(DefaultSite)$(MD5)-$(name)
51
52    This tries to load a library first from some-other-internet-site.org and if
53    that fails from some-internet-site.org.  The library is stored as $(MD5)-$(name)
54    even when it is loaded as another-name.tgz.
55
56=cut
57
58
59use strict;
60
61use File::Spec;
62use File::Path;
63use File::Basename;
64use LWP::UserAgent;
65use Digest::MD5;
66use Digest::SHA;
67use URI;
68
69my $Debug = 1;
70
71my $LocalEnvironment = undef;
72my $GlobalEnvironment = {};
73my @Missing = ();
74
75
76
77
78=head3 ProcessDataFile
79
80    Read the data file, typically named main/external_libs.lst, find the external
81    library tarballs that are not yet present in ext_sources/ and download them.
82
83=cut
84sub ProcessDataFile ($)
85{
86    my $filename = shift;
87
88    my $destination = $ENV{'TARFILE_LOCATION'};
89
90    die "can not open data file $filename" if ! -e $filename;
91
92    my $current_selector_value = 1;
93    my @URLHeads = ();
94    my @download_requests = ();
95
96    open my $in, $filename;
97    while (my $line = <$in>)
98    {
99        # Remove leading and trailing space and comments
100        $line =~ s/^\s+//;
101        $line =~ s/\s+$//;
102        $line =~ s/\s*#.*$//;
103
104        # Ignore empty lines.
105        next if $line eq "";
106
107        # An "if" statement starts a new block.
108        if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/)
109        {
110            ProcessLastBlock();
111
112            $LocalEnvironment = { 'selector' => $1 };
113        }
114
115        # Lines of the form name = value define a local variable.
116        elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/)
117        {
118            if (defined $LocalEnvironment)
119            {
120                $LocalEnvironment->{$1} = $2;
121            }
122            else
123            {
124                $GlobalEnvironment->{$1} = $2;
125            }
126        }
127        else
128        {
129            die "can not parse line $line\n";
130        }
131    }
132
133    ProcessLastBlock();
134
135    Download(\@download_requests, \@URLHeads);
136}
137
138
139
140
141=head3 ProcessLastBlock
142
143    Process the last definition of an external library.
144    If there is not last block, true for the first "if" statement, then the call is ignored.
145
146=cut
147sub ProcessLastBlock ()
148{
149    # Return if no block is defined.
150    return if ! defined $LocalEnvironment;
151
152    # Ignore the block if the selector does not match.
153    if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'})))
154    {
155        printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name'));
156    }
157    else
158    {
159        my $name = GetValue('name');
160        my $checksum = GetChecksum();
161
162        if ( ! IsPresent($name, $checksum))
163        {
164            AddDownloadRequest($name, $checksum);
165        }
166    }
167}
168
169
170
171
172=head3 AddDownloadRequest($name, $checksum)
173
174    Add a request for downloading the library $name to @Missing.
175    Collect all available URL[1-9] variables as source URLs.
176
177=cut
178sub AddDownloadRequest ($$)
179{
180    my ($name, $checksum) = @_;
181
182    print "adding download request for $name\n";
183
184    my $urls = [];
185    my $url = GetValue('URL');
186    push @$urls, SubstituteVariables($url) if (defined $url);
187    for (my $i=1; $i<10; ++$i)
188    {
189        $url = GetValue('URL'.$i);
190        next if ! defined $url;
191        push @$urls, SubstituteVariables($url);
192    }
193
194    push @Missing, [$name, $checksum, $urls];
195}
196
197
198
199
200=head3 GetChecksum()
201
202    When either MD5 or SHA1 are variables in the current scope then return
203    a reference to a hash with two entries:
204        'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum,
205        'value' is the actual checksum
206    Otherwise undef is returned.
207
208=cut
209sub GetChecksum()
210{
211    my $checksum = GetValue("MD5");
212    if (defined $checksum && $checksum ne "")
213    {
214        return { 'type' => 'MD5', 'value' => $checksum };
215    }
216    elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "")
217    {
218        return { 'type' => 'SHA1', 'value' => $checksum };
219    }
220    else
221    {
222        return undef;
223    }
224}
225
226
227
228
229=head3 GetValue($variable_name)
230
231    Return the value of the variable with name $variable_name from the local
232    environment or, if not defined there, the global environment.
233
234=cut
235sub GetValue ($)
236{
237    my $variable_name = shift;
238
239    my $candidate = $LocalEnvironment->{$variable_name};
240    return $candidate if defined $candidate;
241
242    return $GlobalEnvironment->{$variable_name};
243}
244
245
246
247=head3 SubstituteVariables($text)
248
249    Replace all references to variables in $text with the respective variable values.
250    This is done repeatedly until no variable reference remains.
251
252=cut
253sub SubstituteVariables ($)
254{
255    my $text = shift;
256
257    my $infinite_recursion_guard = 100;
258    while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/)
259    {
260        my ($head,$name,$tail) = ($1,$2,$3);
261        my $value = GetValue($name);
262        die "can not evaluate variable $name" if ! defined $value;
263        $text = $head.$value.$tail;
264
265        die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0;
266    }
267
268    return $text;
269}
270
271
272
273
274=head3 EvaluateExpression($expression)
275
276    Evaluate the $expression of an "if" statement to either 0 or 1.  It can
277    be a single term (see EvaluateTerm for a description), or several terms
278    separated by either all ||s or &&s.  A term can also be an expression
279    enclosed in parantheses.
280
281=cut
282sub EvaluateExpression ($)
283{
284    my $expression = shift;
285
286    # Evaluate sub expressions enclosed in parantheses.
287    while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/)
288    {
289        $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3;
290    }
291
292    if ($expression =~ /&&/ && $expression =~ /\|\|/)
293    {
294        die "expression can contain either && or || but not both at the same time";
295    }
296    elsif ($expression =~ /&&/)
297    {
298        foreach my $term (split (/\s*&&\s*/,$expression))
299        {
300            return 0 if ! EvaluateTerm($term);
301        }
302        return 1;
303    }
304    elsif ($expression =~ /\|\|/)
305    {
306        foreach my $term (split (/\s*\|\|\s*/,$expression))
307        {
308            return 1 if EvaluateTerm($term);
309        }
310        return 0;
311    }
312    else
313    {
314        return EvaluateTerm($expression);
315    }
316}
317
318
319
320
321=head3 EvaluateTerm($term)
322
323    Evaluate the $term to either 0 or 1.
324    A term is either the literal "true", which evaluates to 1, or an expression
325    of the form NAME=VALUE or NAME!=VALUE.  NAME is the name of an environment
326    variable and VALUE any string.  VALUE may be empty.
327
328=cut
329sub EvaluateTerm ($)
330{
331    my $term = shift;
332
333    if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/)
334    {
335        my ($variable_name, $operator, $given_value) = ($1,$2,$3);
336        my $variable_value = $ENV{$variable_name};
337        $variable_value = "" if ! defined $variable_value;
338
339        if ($operator eq "==")
340        {
341            return $variable_value eq $given_value;
342        }
343        elsif ($operator eq "!=")
344        {
345            return $variable_value ne $given_value;
346        }
347        else
348        {
349            die "unknown operator in term $term";
350        }
351    }
352    elsif ($term =~ /^\s*true\s*$/i)
353    {
354        return 1;
355    }
356    elsif ($term =~ /^\s*false\s*$/i)
357    {
358        return 0;
359    }
360    else
361    {
362        die "term $term is not of the form <environment-variable> (=|==) <value>";
363    }
364}
365
366
367
368
369=head IsPresent($name, $given_checksum)
370
371    Check if an external library tar ball with the basename $name already
372    exists in the target directory TARFILE_LOCATION.  The basename is
373    prefixed with the MD5 or SHA1 checksum.
374    If the file exists then its checksum is compared to the given one.
375
376=cut
377sub IsPresent ($$)
378{
379    my ($name, $given_checksum) = @_;
380
381    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name);
382    return 0 unless -f $filename;
383
384    # File exists.  Check if its checksum is correct.
385    my $checksum;
386    if ( ! defined $given_checksum)
387    {
388        print "no checksum given, can not verify\n";
389        return 1;
390    }
391    elsif ($given_checksum->{'type'} eq "MD5")
392    {
393        my $md5 = Digest::MD5->new();
394        open my $in, $filename;
395        $md5->addfile($in);
396        $checksum = $md5->hexdigest();
397    }
398    elsif ($given_checksum->{'type'} eq "SHA1")
399    {
400        my $sha1 = Digest::SHA->new("1");
401        open my $in, $filename;
402        $sha1->addfile($in);
403        $checksum = $sha1->hexdigest();
404    }
405    else
406    {
407        die "unsupported checksum type (not MD5 or SHA1)";
408    }
409
410    if ($given_checksum->{'value'} ne $checksum)
411    {
412        # Checksum does not match.  Delete the file.
413        print "$name exists, but checksum does not match => deleting\n";
414        unlink($filename);
415        return 0;
416    }
417    else
418    {
419        printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'});
420        return 1;
421    }
422}
423
424
425
426
427=head3 Download
428
429    Download a set of files specified by @Missing.
430
431    For http URLs there may be an optional checksum.  If it is present then downloaded
432    files that do not match that checksum lead to abortion of the current process.
433    Files that have already been downloaded are not downloaded again.
434
435=cut
436sub Download ()
437{
438    my $download_path = $ENV{'TARFILE_LOCATION'};
439
440    if (scalar @Missing > 0)
441    {
442        printf("downloading %d missing tar ball%s to %s\n",
443               scalar @Missing, scalar @Missing>0 ? "s" : "",
444               $download_path);
445    }
446    else
447    {
448        print "all external libraries present\n";
449        return;
450    }
451
452    # Download the missing files.
453    for my $item (@Missing)
454    {
455        my ($name, $checksum, $urls) = @$item;
456
457        foreach my $url (@$urls)
458        {
459            last if DownloadFile(
460                defined $checksum
461                    ? $checksum->{'value'}."-".$name
462                    : $name,
463                $url,
464                $checksum);
465        }
466    }
467}
468
469
470
471
472=head3 DownloadFile($name,$URL,$checksum)
473
474    Download a single external library tarball.  It origin is given by $URL.
475    Its destination is $(TARFILE_LOCATION)/$checksum-$name.
476
477=cut
478sub DownloadFile ($$$)
479{
480    my $name = shift;
481    my $URL = shift;
482    my $checksum = shift;
483
484    my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name);
485
486    my $temporary_filename = $filename . ".part";
487
488    print "downloading to $temporary_filename\n";
489    my $out;
490    open $out, ">$temporary_filename";
491    binmode($out);
492
493    # Prepare checksum
494    my $digest;
495    if (defined $checksum && $checksum->{'type'} eq "SHA1")
496    {
497        # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.)
498        $digest = Digest::SHA->new("1");
499    }
500    elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5")
501    {
502        # Use MD5 when explicitly requested or when no checksum type is given.
503        $digest = Digest::MD5->new();
504    }
505    else
506    {
507        die "checksum type ".$checksum->{'type'}." is not supported";
508    }
509
510    # Download the extension.
511    my $agent = LWP::UserAgent->new();
512    $agent->timeout(120);
513    $agent->show_progress(1);
514    my $last_was_redirect = 0;
515    $agent->add_handler('response_redirect'
516                        => sub{
517                            $last_was_redirect = 1;
518                            return;
519                        });
520    $agent->add_handler('response_data'
521                        => sub{
522                            if ($last_was_redirect)
523                            {
524                                $last_was_redirect = 0;
525                                # Throw away the data we got so far.
526                                $digest->reset();
527                                close $out;
528                                open $out, ">$temporary_filename";
529                                binmode($out);
530                            }
531                            my($response,$agent,$h,$data)=@_;
532                            print $out $data;
533                            $digest->add($data);
534                        });
535
536    my $response = $agent->get($URL);
537    close $out;
538
539    # When download was successfull then check the checksum and rename the .part file
540    # into the actual extension name.
541    if ($response->is_success())
542    {
543        my $file_checksum = $digest->hexdigest();
544        if (defined $checksum)
545        {
546            if ($checksum->{'value'} eq $file_checksum)
547            {
548                printf("%s checksum is OK\n", $checksum->{'type'});
549            }
550            else
551            {
552                unlink($temporary_filename);
553                printf("    %s checksum does not match (%s instead of %s)\n",
554                       $file_checksum,
555                       $checksum->{'value'},
556                       $checksum->{'type'});
557                return 0;
558            }
559        }
560        else
561        {
562            # The datafile does not contain a checksum to match against.
563            # Display the one that was calculated for the downloaded file so that
564            # it can be integrated manually into the data file.
565            printf("checksum not given, md5 of file is %s\n", $file_checksum);
566            $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name);
567        }
568
569        rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
570        return 1;
571    }
572    else
573    {
574        unlink($temporary_filename);
575        print "    download failed\n";
576        return 0;
577    }
578}
579
580
581
582
583=head3 CheckDownloadDestination ()
584
585    Make sure that the download destination $TARFILE_LOCATION does exist.  If
586    not, then the directory is created.
587
588=cut
589sub CheckDownloadDestination ()
590{
591    my $destination = $ENV{'TARFILE_LOCATION'};
592    die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq "");
593
594    if ( ! -d $destination)
595    {
596        File::Path::make_path($destination);
597        die "ERROR: can't create \$TARFILE_LOCATION" if  ! -d $destination;
598    }
599}
600
601
602
603
604=head3 ProvideSpecialTarball ($url,$name,$name_converter)
605
606    A few tarballs need special handling.  That is done here.
607
608=cut
609sub ProvideSpecialTarball ($$$)
610{
611    my $url = shift;
612    my $name = shift;
613    my $name_converter = shift;
614
615    return unless defined $url && $url ne "";
616
617    # See if we can find the executable.
618    my ($SOLARENV,$OUTPATH,$EXEEXT) =  ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'});
619    $SOLARENV = "" unless defined $SOLARENV;
620    $OUTPATH = "" unless defined $OUTPATH;
621    $EXEEXT = "" unless defined $EXEEXT;
622    if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT))
623    {
624        print "found $name executable\n";
625        return;
626    }
627
628    # Download the source from the URL.
629    my $basename = basename(URI->new($url)->path());
630    die unless defined $basename;
631
632    if (defined $name_converter)
633    {
634        $basename = &{$name_converter}($basename);
635    }
636
637    # Has the source tar ball already been downloaded?
638    my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename));
639    if (scalar @candidates > 0)
640    {
641        # Yes.
642        print "$basename exists\n";
643        return;
644    }
645    else
646    {
647        # No, download it.
648        print "downloading $basename\n";
649        DownloadFile($basename, $url, undef);
650    }
651}
652
653
654
655
656
657# The main() functionality.
658
659die "usage: $0 <data-file-name>" if scalar @ARGV != 1;
660my $data_file = $ARGV[0];
661CheckDownloadDestination();
662ProcessDataFile($data_file);
663ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef);
664ProvideSpecialTarball(
665    $ENV{'EPM_URL'},
666    "epm",
667    sub{$_[0]=~s/-source//; return $_[0]});
668