xref: /trunk/main/solenv/bin/modules/GenInfoParser.pm (revision cdf0e10c)
1*cdf0e10cSrcweir#*************************************************************************
2*cdf0e10cSrcweir#
3*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir#
5*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir#
7*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir#
9*cdf0e10cSrcweir# This file is part of OpenOffice.org.
10*cdf0e10cSrcweir#
11*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir# only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir#
15*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir#
21*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir# version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir# <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir# for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir#
26*cdf0e10cSrcweir#*************************************************************************
27*cdf0e10cSrcweir
28*cdf0e10cSrcweir#*************************************************************************
29*cdf0e10cSrcweir#
30*cdf0e10cSrcweir# GenInfoParser - Perl extension for parsing general info databases
31*cdf0e10cSrcweir#
32*cdf0e10cSrcweir# usage: see below
33*cdf0e10cSrcweir#
34*cdf0e10cSrcweir#*************************************************************************
35*cdf0e10cSrcweir
36*cdf0e10cSrcweirpackage GenInfoParser;
37*cdf0e10cSrcweir
38*cdf0e10cSrcweiruse strict;
39*cdf0e10cSrcweir
40*cdf0e10cSrcweiruse Carp;
41*cdf0e10cSrcweir
42*cdf0e10cSrcweir#####  profiling #####
43*cdf0e10cSrcweir# use Benchmark;
44*cdf0e10cSrcweir
45*cdf0e10cSrcweir##### ctor #####
46*cdf0e10cSrcweir
47*cdf0e10cSrcweirsub new {
48*cdf0e10cSrcweir    my $proto = shift;
49*cdf0e10cSrcweir    my $class = ref($proto) || $proto;
50*cdf0e10cSrcweir    my $self = {};
51*cdf0e10cSrcweir    $self->{'LIST'} = undef;
52*cdf0e10cSrcweir    $self->{'DATA'} = {};
53*cdf0e10cSrcweir    bless ($self, $class);
54*cdf0e10cSrcweir    return $self;
55*cdf0e10cSrcweir}
56*cdf0e10cSrcweir
57*cdf0e10cSrcweir##### methods #####
58*cdf0e10cSrcweir
59*cdf0e10cSrcweirsub load_list
60*cdf0e10cSrcweir{
61*cdf0e10cSrcweir    # load list into memory
62*cdf0e10cSrcweir    my $self        = shift;
63*cdf0e10cSrcweir    my $list_file   = shift;
64*cdf0e10cSrcweir
65*cdf0e10cSrcweir    if ( $self->parse_list($list_file) ) {
66*cdf0e10cSrcweir        return 1;
67*cdf0e10cSrcweir    }
68*cdf0e10cSrcweir    return 0;
69*cdf0e10cSrcweir}
70*cdf0e10cSrcweir
71*cdf0e10cSrcweirsub get_keys
72*cdf0e10cSrcweir{
73*cdf0e10cSrcweir	# return a sorted list of keys, the sorting is case insensitive
74*cdf0e10cSrcweir    my $self        = shift;
75*cdf0e10cSrcweir    my $access_path = shift;
76*cdf0e10cSrcweir
77*cdf0e10cSrcweir    my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path);
78*cdf0e10cSrcweir
79*cdf0e10cSrcweir    my @keys = ();
80*cdf0e10cSrcweir	if ( $sub_data_ref ) {
81*cdf0e10cSrcweir		my @normalized_keys = keys %$sub_data_ref;
82*cdf0e10cSrcweir		foreach my $normalized_key (sort keys %$sub_data_ref) {
83*cdf0e10cSrcweir			push(@keys, $$sub_data_ref{$normalized_key}[0]);
84*cdf0e10cSrcweir        }
85*cdf0e10cSrcweir    } elsif ( $value ) {
86*cdf0e10cSrcweir        chomp $value;
87*cdf0e10cSrcweir        push @keys, ($value);
88*cdf0e10cSrcweir    }
89*cdf0e10cSrcweir    return @keys;
90*cdf0e10cSrcweir}
91*cdf0e10cSrcweir
92*cdf0e10cSrcweirsub get_key
93*cdf0e10cSrcweir{
94*cdf0e10cSrcweir    # returns the key corresponding to the access_path
95*cdf0e10cSrcweir    my $self        = shift;
96*cdf0e10cSrcweir    my $access_path = shift;
97*cdf0e10cSrcweir
98*cdf0e10cSrcweir    my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path);
99*cdf0e10cSrcweir    return undef if !$key;
100*cdf0e10cSrcweir    return $key;
101*cdf0e10cSrcweir}
102*cdf0e10cSrcweir
103*cdf0e10cSrcweirsub get_value
104*cdf0e10cSrcweir{
105*cdf0e10cSrcweir    # returns the value corresponding to the access_path
106*cdf0e10cSrcweir    my $self        = shift;
107*cdf0e10cSrcweir    my $access_path = shift;
108*cdf0e10cSrcweir
109*cdf0e10cSrcweir    my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path);
110*cdf0e10cSrcweir    return undef if !$key;
111*cdf0e10cSrcweir	$value = "" if !defined($value);
112*cdf0e10cSrcweir    # trim line ends
113*cdf0e10cSrcweir    $value =~ tr/\r\n//d;
114*cdf0e10cSrcweir    # trim trailing whitespace
115*cdf0e10cSrcweir    $value =~ s/\s+$//;
116*cdf0e10cSrcweir    return $value;
117*cdf0e10cSrcweir}
118*cdf0e10cSrcweir
119*cdf0e10cSrcweir##### private methods #####
120*cdf0e10cSrcweir
121*cdf0e10cSrcweirsub parse_list
122*cdf0e10cSrcweir{
123*cdf0e10cSrcweir    # parse complete list
124*cdf0e10cSrcweir    my $self        = shift;
125*cdf0e10cSrcweir    my $list_file   = shift;
126*cdf0e10cSrcweir    my @list_data;
127*cdf0e10cSrcweir
128*cdf0e10cSrcweir    return 0 if ! -r $list_file;
129*cdf0e10cSrcweir
130*cdf0e10cSrcweir    open(FILE, "<$list_file") or croak("can't open $list_file: $!");
131*cdf0e10cSrcweir    # my $t0 = new Benchmark;
132*cdf0e10cSrcweir    $self->parse_block(\*FILE, $self->{'DATA'});
133*cdf0e10cSrcweir    # my $t1 = new Benchmark;
134*cdf0e10cSrcweir    # print STDERR "parsing $list_file took: ", timestr(timediff($t1, $t0)), "\n";
135*cdf0e10cSrcweir    close(FILE);
136*cdf0e10cSrcweir}
137*cdf0e10cSrcweir
138*cdf0e10cSrcweirsub parse_block
139*cdf0e10cSrcweir{
140*cdf0e10cSrcweir	# parse each sub block and place it in a hash
141*cdf0e10cSrcweir	# used data structure:
142*cdf0e10cSrcweir	# $hash{$normalized_key} = [ $key, $value, 0 | $sub_hash_ref ]
143*cdf0e10cSrcweir    my $self        = shift;
144*cdf0e10cSrcweir    my $glob_ref    = shift;
145*cdf0e10cSrcweir    my $data_ref    = shift;
146*cdf0e10cSrcweir
147*cdf0e10cSrcweir    my $current_key = 0;
148*cdf0e10cSrcweir    my $line;
149*cdf0e10cSrcweir    while( $line = <$glob_ref> ) {
150*cdf0e10cSrcweir        # this is the inner loop, any additional pattern matching will
151*cdf0e10cSrcweir        # have a notable affect on runtime behavior
152*cdf0e10cSrcweir        # clean up of $value is done in get_value()
153*cdf0e10cSrcweir        my ($key, $value) = split(' ', $line, 2);
154*cdf0e10cSrcweir        next if !$key;                  # skip empty lines
155*cdf0e10cSrcweir        my $chr = substr($key, 0, 1);
156*cdf0e10cSrcweir        next if $chr eq '#';            # skip comment lines
157*cdf0e10cSrcweir        last if $chr eq '}';            # return from block;
158*cdf0e10cSrcweir        if ( $chr eq '{' ) {
159*cdf0e10cSrcweir            if ( !$current_key ) {
160*cdf0e10cSrcweir                croak("unexpected block start");
161*cdf0e10cSrcweir            }
162*cdf0e10cSrcweir            else {
163*cdf0e10cSrcweir				# create empty hash and start sub block parse
164*cdf0e10cSrcweir                $$data_ref{$current_key}[2] = {};
165*cdf0e10cSrcweir                $self->parse_block($glob_ref, $$data_ref{$current_key}[2]);
166*cdf0e10cSrcweir                next;
167*cdf0e10cSrcweir            }
168*cdf0e10cSrcweir        }
169*cdf0e10cSrcweir        # sanity check
170*cdf0e10cSrcweir        croak("key $key is not well formed") if $key =~ /\//;
171*cdf0e10cSrcweir		# normalize key for hash lookup
172*cdf0e10cSrcweir		$current_key = lc($key);
173*cdf0e10cSrcweir		# but we have to keep the original - not normalized - key, too
174*cdf0e10cSrcweir        $$data_ref{($current_key)} = [$key, $value, 0];
175*cdf0e10cSrcweir    }
176*cdf0e10cSrcweir}
177*cdf0e10cSrcweir
178*cdf0e10cSrcweirsub walk_accesspath
179*cdf0e10cSrcweir{
180*cdf0e10cSrcweir	# returns the key, value and sub_data_ref which
181*cdf0e10cSrcweir    # corresponds to the access_path
182*cdf0e10cSrcweir
183*cdf0e10cSrcweir    my $self        = shift;
184*cdf0e10cSrcweir    my $access_path = shift;
185*cdf0e10cSrcweir
186*cdf0e10cSrcweir    my $sub_data_ref = $self->{'DATA'};
187*cdf0e10cSrcweir
188*cdf0e10cSrcweir    if ( $access_path ) {
189*cdf0e10cSrcweir        my $lookup_ref = 0;
190*cdf0e10cSrcweir        # normalize key
191*cdf0e10cSrcweir        $access_path = lc($access_path);
192*cdf0e10cSrcweir        my @key_sequence = split(/\//, $access_path);
193*cdf0e10cSrcweir        foreach my $key_element (@key_sequence) {
194*cdf0e10cSrcweir            # at least one more key element, but no sub_hash, accesspath invalid
195*cdf0e10cSrcweir            return () if !$sub_data_ref;
196*cdf0e10cSrcweir			$lookup_ref = $$sub_data_ref{$key_element};
197*cdf0e10cSrcweir			# lookup failed, accesspath invalid
198*cdf0e10cSrcweir			return () if !defined($lookup_ref);
199*cdf0e10cSrcweir			# we've got a valid key
200*cdf0e10cSrcweir			$sub_data_ref = $$lookup_ref[2];
201*cdf0e10cSrcweir        }
202*cdf0e10cSrcweir	    return ($$lookup_ref[0], $$lookup_ref[1], $sub_data_ref);
203*cdf0e10cSrcweir    }
204*cdf0e10cSrcweir    else {
205*cdf0e10cSrcweir        # empty access path is only vlaid for getting top level key list
206*cdf0e10cSrcweir        return ( undef, undef, $sub_data_ref );
207*cdf0e10cSrcweir    }
208*cdf0e10cSrcweir}
209*cdf0e10cSrcweir
210*cdf0e10cSrcweir##### finish #####
211*cdf0e10cSrcweir
212*cdf0e10cSrcweir1; # needed by use or require
213*cdf0e10cSrcweir
214*cdf0e10cSrcweir__END__
215*cdf0e10cSrcweir
216*cdf0e10cSrcweir=head1 NAME
217*cdf0e10cSrcweir
218*cdf0e10cSrcweirGenInfoParser - Perl extension for parsing general info databases
219*cdf0e10cSrcweir
220*cdf0e10cSrcweir=head1 SYNOPSIS
221*cdf0e10cSrcweir
222*cdf0e10cSrcweir    # example that will load a general info database called 'stand.lst'
223*cdf0e10cSrcweir
224*cdf0e10cSrcweir    use GenInfoParser;
225*cdf0e10cSrcweir
226*cdf0e10cSrcweir    # Create a new instance of the parser:
227*cdf0e10cSrcweir    $a = GenInfoParser->new();
228*cdf0e10cSrcweir
229*cdf0e10cSrcweir    # Load the database into the parser:
230*cdf0e10cSrcweir    $a->load_list('ssrc633.ini');
231*cdf0e10cSrcweir
232*cdf0e10cSrcweir    # get top level keys from database
233*cdf0e10cSrcweir    @top_level_keys = $a->get_keys();
234*cdf0e10cSrcweir
235*cdf0e10cSrcweir    # get sub list keys
236*cdf0e10cSrcweir    @sub_list_keys = $a->get_keys('src633/Drives/o:/Projects');
237*cdf0e10cSrcweir
238*cdf0e10cSrcweir    # get key/value pair
239*cdf0e10cSrcweir    $key = $a->get_key('src633/Comment/build');
240*cdf0e10cSrcweir    $value = $a->get_value('src633/Comment/build');
241*cdf0e10cSrcweir
242*cdf0e10cSrcweir=head1 DESCRIPTION
243*cdf0e10cSrcweir
244*cdf0e10cSrcweirGenInfoParser is a perl extension to load and parse General Info Databses.
245*cdf0e10cSrcweirIt uses a simple object oriented interface to retrieve the information stored
246*cdf0e10cSrcweirin the database.
247*cdf0e10cSrcweir
248*cdf0e10cSrcweirMethods:
249*cdf0e10cSrcweir
250*cdf0e10cSrcweirGenInfoParser::new()
251*cdf0e10cSrcweir
252*cdf0e10cSrcweirCreates a new instance of the parser. Can't fail.
253*cdf0e10cSrcweir
254*cdf0e10cSrcweir
255*cdf0e10cSrcweirGenInfoParser::load_list($database)
256*cdf0e10cSrcweir
257*cdf0e10cSrcweirLoads and parses $database. Returns 1 on success and 0 on failure
258*cdf0e10cSrcweir
259*cdf0e10cSrcweir
260*cdf0e10cSrcweirGenInfoParser::get_keys($path)
261*cdf0e10cSrcweir
262*cdf0e10cSrcweirReturns a sorted list of keys from the path $path. Returns an emtpy list if $path
263*cdf0e10cSrcweirhas no sublist. If there is no $path spcified, the method will return the
264*cdf0e10cSrcweirprimary key list. $path can be specified case insensitive. Sorting is done case
265*cdf0e10cSrcweirinsensitive.
266*cdf0e10cSrcweir
267*cdf0e10cSrcweirGenInfoParser::get_key($path)
268*cdf0e10cSrcweir
269*cdf0e10cSrcweirReturns the key to $path or 'undef' if an invalid path is given.
270*cdf0e10cSrcweirExample: $path = 'src633/comment/build' will return 'Build' as key.
271*cdf0e10cSrcweirNote: $path can be specified case insensitive, but the returned key will
272*cdf0e10cSrcweirhave the exact case as in the database.
273*cdf0e10cSrcweir
274*cdf0e10cSrcweirGenInfoParser::get_value($path)
275*cdf0e10cSrcweir
276*cdf0e10cSrcweirReturns the value to $path or 'undef' is invalid path is given.
277*cdf0e10cSrcweir
278*cdf0e10cSrcweir
279*cdf0e10cSrcweir=head2 EXPORT
280*cdf0e10cSrcweir
281*cdf0e10cSrcweirGenInfoParser::new()
282*cdf0e10cSrcweirGenInfoParser::load_list($database)
283*cdf0e10cSrcweirGenInfoParser::get_keys($path)
284*cdf0e10cSrcweirGenInfoParser::get_key($path)
285*cdf0e10cSrcweirGenInfoParser::get_value($path)
286*cdf0e10cSrcweir
287*cdf0e10cSrcweir
288*cdf0e10cSrcweir=head1 AUTHOR
289*cdf0e10cSrcweir
290*cdf0e10cSrcweirJens-Heiner Rechtien, rechtien@sun.com
291*cdf0e10cSrcweir
292*cdf0e10cSrcweir=head1 SEE ALSO
293*cdf0e10cSrcweir
294*cdf0e10cSrcweirperl(1).
295*cdf0e10cSrcweir
296*cdf0e10cSrcweir=cut
297