1*cdf0e10cSrcweir#************************************************************************* 2*cdf0e10cSrcweir# 3*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir# 5*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir# 7*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir# 9*cdf0e10cSrcweir# This file is part of OpenOffice.org. 10*cdf0e10cSrcweir# 11*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir# only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir# 15*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir# 21*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir# version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir# <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir# for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir# 26*cdf0e10cSrcweir#************************************************************************* 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir#************************************************************************* 29*cdf0e10cSrcweir# 30*cdf0e10cSrcweir# GenInfoParser - Perl extension for parsing general info databases 31*cdf0e10cSrcweir# 32*cdf0e10cSrcweir# usage: see below 33*cdf0e10cSrcweir# 34*cdf0e10cSrcweir#************************************************************************* 35*cdf0e10cSrcweir 36*cdf0e10cSrcweirpackage GenInfoParser; 37*cdf0e10cSrcweir 38*cdf0e10cSrcweiruse strict; 39*cdf0e10cSrcweir 40*cdf0e10cSrcweiruse Carp; 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir##### profiling ##### 43*cdf0e10cSrcweir# use Benchmark; 44*cdf0e10cSrcweir 45*cdf0e10cSrcweir##### ctor ##### 46*cdf0e10cSrcweir 47*cdf0e10cSrcweirsub new { 48*cdf0e10cSrcweir my $proto = shift; 49*cdf0e10cSrcweir my $class = ref($proto) || $proto; 50*cdf0e10cSrcweir my $self = {}; 51*cdf0e10cSrcweir $self->{'LIST'} = undef; 52*cdf0e10cSrcweir $self->{'DATA'} = {}; 53*cdf0e10cSrcweir bless ($self, $class); 54*cdf0e10cSrcweir return $self; 55*cdf0e10cSrcweir} 56*cdf0e10cSrcweir 57*cdf0e10cSrcweir##### methods ##### 58*cdf0e10cSrcweir 59*cdf0e10cSrcweirsub load_list 60*cdf0e10cSrcweir{ 61*cdf0e10cSrcweir # load list into memory 62*cdf0e10cSrcweir my $self = shift; 63*cdf0e10cSrcweir my $list_file = shift; 64*cdf0e10cSrcweir 65*cdf0e10cSrcweir if ( $self->parse_list($list_file) ) { 66*cdf0e10cSrcweir return 1; 67*cdf0e10cSrcweir } 68*cdf0e10cSrcweir return 0; 69*cdf0e10cSrcweir} 70*cdf0e10cSrcweir 71*cdf0e10cSrcweirsub get_keys 72*cdf0e10cSrcweir{ 73*cdf0e10cSrcweir # return a sorted list of keys, the sorting is case insensitive 74*cdf0e10cSrcweir my $self = shift; 75*cdf0e10cSrcweir my $access_path = shift; 76*cdf0e10cSrcweir 77*cdf0e10cSrcweir my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path); 78*cdf0e10cSrcweir 79*cdf0e10cSrcweir my @keys = (); 80*cdf0e10cSrcweir if ( $sub_data_ref ) { 81*cdf0e10cSrcweir my @normalized_keys = keys %$sub_data_ref; 82*cdf0e10cSrcweir foreach my $normalized_key (sort keys %$sub_data_ref) { 83*cdf0e10cSrcweir push(@keys, $$sub_data_ref{$normalized_key}[0]); 84*cdf0e10cSrcweir } 85*cdf0e10cSrcweir } elsif ( $value ) { 86*cdf0e10cSrcweir chomp $value; 87*cdf0e10cSrcweir push @keys, ($value); 88*cdf0e10cSrcweir } 89*cdf0e10cSrcweir return @keys; 90*cdf0e10cSrcweir} 91*cdf0e10cSrcweir 92*cdf0e10cSrcweirsub get_key 93*cdf0e10cSrcweir{ 94*cdf0e10cSrcweir # returns the key corresponding to the access_path 95*cdf0e10cSrcweir my $self = shift; 96*cdf0e10cSrcweir my $access_path = shift; 97*cdf0e10cSrcweir 98*cdf0e10cSrcweir my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path); 99*cdf0e10cSrcweir return undef if !$key; 100*cdf0e10cSrcweir return $key; 101*cdf0e10cSrcweir} 102*cdf0e10cSrcweir 103*cdf0e10cSrcweirsub get_value 104*cdf0e10cSrcweir{ 105*cdf0e10cSrcweir # returns the value corresponding to the access_path 106*cdf0e10cSrcweir my $self = shift; 107*cdf0e10cSrcweir my $access_path = shift; 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir my ($key, $value, $sub_data_ref) = $self->walk_accesspath($access_path); 110*cdf0e10cSrcweir return undef if !$key; 111*cdf0e10cSrcweir $value = "" if !defined($value); 112*cdf0e10cSrcweir # trim line ends 113*cdf0e10cSrcweir $value =~ tr/\r\n//d; 114*cdf0e10cSrcweir # trim trailing whitespace 115*cdf0e10cSrcweir $value =~ s/\s+$//; 116*cdf0e10cSrcweir return $value; 117*cdf0e10cSrcweir} 118*cdf0e10cSrcweir 119*cdf0e10cSrcweir##### private methods ##### 120*cdf0e10cSrcweir 121*cdf0e10cSrcweirsub parse_list 122*cdf0e10cSrcweir{ 123*cdf0e10cSrcweir # parse complete list 124*cdf0e10cSrcweir my $self = shift; 125*cdf0e10cSrcweir my $list_file = shift; 126*cdf0e10cSrcweir my @list_data; 127*cdf0e10cSrcweir 128*cdf0e10cSrcweir return 0 if ! -r $list_file; 129*cdf0e10cSrcweir 130*cdf0e10cSrcweir open(FILE, "<$list_file") or croak("can't open $list_file: $!"); 131*cdf0e10cSrcweir # my $t0 = new Benchmark; 132*cdf0e10cSrcweir $self->parse_block(\*FILE, $self->{'DATA'}); 133*cdf0e10cSrcweir # my $t1 = new Benchmark; 134*cdf0e10cSrcweir # print STDERR "parsing $list_file took: ", timestr(timediff($t1, $t0)), "\n"; 135*cdf0e10cSrcweir close(FILE); 136*cdf0e10cSrcweir} 137*cdf0e10cSrcweir 138*cdf0e10cSrcweirsub parse_block 139*cdf0e10cSrcweir{ 140*cdf0e10cSrcweir # parse each sub block and place it in a hash 141*cdf0e10cSrcweir # used data structure: 142*cdf0e10cSrcweir # $hash{$normalized_key} = [ $key, $value, 0 | $sub_hash_ref ] 143*cdf0e10cSrcweir my $self = shift; 144*cdf0e10cSrcweir my $glob_ref = shift; 145*cdf0e10cSrcweir my $data_ref = shift; 146*cdf0e10cSrcweir 147*cdf0e10cSrcweir my $current_key = 0; 148*cdf0e10cSrcweir my $line; 149*cdf0e10cSrcweir while( $line = <$glob_ref> ) { 150*cdf0e10cSrcweir # this is the inner loop, any additional pattern matching will 151*cdf0e10cSrcweir # have a notable affect on runtime behavior 152*cdf0e10cSrcweir # clean up of $value is done in get_value() 153*cdf0e10cSrcweir my ($key, $value) = split(' ', $line, 2); 154*cdf0e10cSrcweir next if !$key; # skip empty lines 155*cdf0e10cSrcweir my $chr = substr($key, 0, 1); 156*cdf0e10cSrcweir next if $chr eq '#'; # skip comment lines 157*cdf0e10cSrcweir last if $chr eq '}'; # return from block; 158*cdf0e10cSrcweir if ( $chr eq '{' ) { 159*cdf0e10cSrcweir if ( !$current_key ) { 160*cdf0e10cSrcweir croak("unexpected block start"); 161*cdf0e10cSrcweir } 162*cdf0e10cSrcweir else { 163*cdf0e10cSrcweir # create empty hash and start sub block parse 164*cdf0e10cSrcweir $$data_ref{$current_key}[2] = {}; 165*cdf0e10cSrcweir $self->parse_block($glob_ref, $$data_ref{$current_key}[2]); 166*cdf0e10cSrcweir next; 167*cdf0e10cSrcweir } 168*cdf0e10cSrcweir } 169*cdf0e10cSrcweir # sanity check 170*cdf0e10cSrcweir croak("key $key is not well formed") if $key =~ /\//; 171*cdf0e10cSrcweir # normalize key for hash lookup 172*cdf0e10cSrcweir $current_key = lc($key); 173*cdf0e10cSrcweir # but we have to keep the original - not normalized - key, too 174*cdf0e10cSrcweir $$data_ref{($current_key)} = [$key, $value, 0]; 175*cdf0e10cSrcweir } 176*cdf0e10cSrcweir} 177*cdf0e10cSrcweir 178*cdf0e10cSrcweirsub walk_accesspath 179*cdf0e10cSrcweir{ 180*cdf0e10cSrcweir # returns the key, value and sub_data_ref which 181*cdf0e10cSrcweir # corresponds to the access_path 182*cdf0e10cSrcweir 183*cdf0e10cSrcweir my $self = shift; 184*cdf0e10cSrcweir my $access_path = shift; 185*cdf0e10cSrcweir 186*cdf0e10cSrcweir my $sub_data_ref = $self->{'DATA'}; 187*cdf0e10cSrcweir 188*cdf0e10cSrcweir if ( $access_path ) { 189*cdf0e10cSrcweir my $lookup_ref = 0; 190*cdf0e10cSrcweir # normalize key 191*cdf0e10cSrcweir $access_path = lc($access_path); 192*cdf0e10cSrcweir my @key_sequence = split(/\//, $access_path); 193*cdf0e10cSrcweir foreach my $key_element (@key_sequence) { 194*cdf0e10cSrcweir # at least one more key element, but no sub_hash, accesspath invalid 195*cdf0e10cSrcweir return () if !$sub_data_ref; 196*cdf0e10cSrcweir $lookup_ref = $$sub_data_ref{$key_element}; 197*cdf0e10cSrcweir # lookup failed, accesspath invalid 198*cdf0e10cSrcweir return () if !defined($lookup_ref); 199*cdf0e10cSrcweir # we've got a valid key 200*cdf0e10cSrcweir $sub_data_ref = $$lookup_ref[2]; 201*cdf0e10cSrcweir } 202*cdf0e10cSrcweir return ($$lookup_ref[0], $$lookup_ref[1], $sub_data_ref); 203*cdf0e10cSrcweir } 204*cdf0e10cSrcweir else { 205*cdf0e10cSrcweir # empty access path is only vlaid for getting top level key list 206*cdf0e10cSrcweir return ( undef, undef, $sub_data_ref ); 207*cdf0e10cSrcweir } 208*cdf0e10cSrcweir} 209*cdf0e10cSrcweir 210*cdf0e10cSrcweir##### finish ##### 211*cdf0e10cSrcweir 212*cdf0e10cSrcweir1; # needed by use or require 213*cdf0e10cSrcweir 214*cdf0e10cSrcweir__END__ 215*cdf0e10cSrcweir 216*cdf0e10cSrcweir=head1 NAME 217*cdf0e10cSrcweir 218*cdf0e10cSrcweirGenInfoParser - Perl extension for parsing general info databases 219*cdf0e10cSrcweir 220*cdf0e10cSrcweir=head1 SYNOPSIS 221*cdf0e10cSrcweir 222*cdf0e10cSrcweir # example that will load a general info database called 'stand.lst' 223*cdf0e10cSrcweir 224*cdf0e10cSrcweir use GenInfoParser; 225*cdf0e10cSrcweir 226*cdf0e10cSrcweir # Create a new instance of the parser: 227*cdf0e10cSrcweir $a = GenInfoParser->new(); 228*cdf0e10cSrcweir 229*cdf0e10cSrcweir # Load the database into the parser: 230*cdf0e10cSrcweir $a->load_list('ssrc633.ini'); 231*cdf0e10cSrcweir 232*cdf0e10cSrcweir # get top level keys from database 233*cdf0e10cSrcweir @top_level_keys = $a->get_keys(); 234*cdf0e10cSrcweir 235*cdf0e10cSrcweir # get sub list keys 236*cdf0e10cSrcweir @sub_list_keys = $a->get_keys('src633/Drives/o:/Projects'); 237*cdf0e10cSrcweir 238*cdf0e10cSrcweir # get key/value pair 239*cdf0e10cSrcweir $key = $a->get_key('src633/Comment/build'); 240*cdf0e10cSrcweir $value = $a->get_value('src633/Comment/build'); 241*cdf0e10cSrcweir 242*cdf0e10cSrcweir=head1 DESCRIPTION 243*cdf0e10cSrcweir 244*cdf0e10cSrcweirGenInfoParser is a perl extension to load and parse General Info Databses. 245*cdf0e10cSrcweirIt uses a simple object oriented interface to retrieve the information stored 246*cdf0e10cSrcweirin the database. 247*cdf0e10cSrcweir 248*cdf0e10cSrcweirMethods: 249*cdf0e10cSrcweir 250*cdf0e10cSrcweirGenInfoParser::new() 251*cdf0e10cSrcweir 252*cdf0e10cSrcweirCreates a new instance of the parser. Can't fail. 253*cdf0e10cSrcweir 254*cdf0e10cSrcweir 255*cdf0e10cSrcweirGenInfoParser::load_list($database) 256*cdf0e10cSrcweir 257*cdf0e10cSrcweirLoads and parses $database. Returns 1 on success and 0 on failure 258*cdf0e10cSrcweir 259*cdf0e10cSrcweir 260*cdf0e10cSrcweirGenInfoParser::get_keys($path) 261*cdf0e10cSrcweir 262*cdf0e10cSrcweirReturns a sorted list of keys from the path $path. Returns an emtpy list if $path 263*cdf0e10cSrcweirhas no sublist. If there is no $path spcified, the method will return the 264*cdf0e10cSrcweirprimary key list. $path can be specified case insensitive. Sorting is done case 265*cdf0e10cSrcweirinsensitive. 266*cdf0e10cSrcweir 267*cdf0e10cSrcweirGenInfoParser::get_key($path) 268*cdf0e10cSrcweir 269*cdf0e10cSrcweirReturns the key to $path or 'undef' if an invalid path is given. 270*cdf0e10cSrcweirExample: $path = 'src633/comment/build' will return 'Build' as key. 271*cdf0e10cSrcweirNote: $path can be specified case insensitive, but the returned key will 272*cdf0e10cSrcweirhave the exact case as in the database. 273*cdf0e10cSrcweir 274*cdf0e10cSrcweirGenInfoParser::get_value($path) 275*cdf0e10cSrcweir 276*cdf0e10cSrcweirReturns the value to $path or 'undef' is invalid path is given. 277*cdf0e10cSrcweir 278*cdf0e10cSrcweir 279*cdf0e10cSrcweir=head2 EXPORT 280*cdf0e10cSrcweir 281*cdf0e10cSrcweirGenInfoParser::new() 282*cdf0e10cSrcweirGenInfoParser::load_list($database) 283*cdf0e10cSrcweirGenInfoParser::get_keys($path) 284*cdf0e10cSrcweirGenInfoParser::get_key($path) 285*cdf0e10cSrcweirGenInfoParser::get_value($path) 286*cdf0e10cSrcweir 287*cdf0e10cSrcweir 288*cdf0e10cSrcweir=head1 AUTHOR 289*cdf0e10cSrcweir 290*cdf0e10cSrcweirJens-Heiner Rechtien, rechtien@sun.com 291*cdf0e10cSrcweir 292*cdf0e10cSrcweir=head1 SEE ALSO 293*cdf0e10cSrcweir 294*cdf0e10cSrcweirperl(1). 295*cdf0e10cSrcweir 296*cdf0e10cSrcweir=cut 297