1*b1cdbd2cSJim Jagielski#!/usr/bin/gawk -f 2*b1cdbd2cSJim Jagielski# ************************************************************* 3*b1cdbd2cSJim Jagielski# 4*b1cdbd2cSJim Jagielski# Licensed to the Apache Software Foundation (ASF) under one 5*b1cdbd2cSJim Jagielski# or more contributor license agreements. See the NOTICE file 6*b1cdbd2cSJim Jagielski# distributed with this work for additional information 7*b1cdbd2cSJim Jagielski# regarding copyright ownership. The ASF licenses this file 8*b1cdbd2cSJim Jagielski# to you under the Apache License, Version 2.0 (the 9*b1cdbd2cSJim Jagielski# "License"); you may not use this file except in compliance 10*b1cdbd2cSJim Jagielski# with the License. You may obtain a copy of the License at 11*b1cdbd2cSJim Jagielski# 12*b1cdbd2cSJim Jagielski# http://www.apache.org/licenses/LICENSE-2.0 13*b1cdbd2cSJim Jagielski# 14*b1cdbd2cSJim Jagielski# Unless required by applicable law or agreed to in writing, 15*b1cdbd2cSJim Jagielski# software distributed under the License is distributed on an 16*b1cdbd2cSJim Jagielski# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17*b1cdbd2cSJim Jagielski# KIND, either express or implied. See the License for the 18*b1cdbd2cSJim Jagielski# specific language governing permissions and limitations 19*b1cdbd2cSJim Jagielski# under the License. 20*b1cdbd2cSJim Jagielski# 21*b1cdbd2cSJim Jagielski# ************************************************************* 22*b1cdbd2cSJim Jagielski# Usage: gawk -f currency-check.awk *.xml 23*b1cdbd2cSJim Jagielski# Check any 24*b1cdbd2cSJim Jagielski# <FormatCode>...[$xxx-...]...</FormatCode> 25*b1cdbd2cSJim Jagielski# against every 26*b1cdbd2cSJim Jagielski# <CurrencySymbol>xxx</CurrencySymbol> 27*b1cdbd2cSJim Jagielski# definition of the same XML file and output symbols if no match was found. 28*b1cdbd2cSJim Jagielski# For formatindex="12" to formatindex="15" and for formatindex="17" it is 29*b1cdbd2cSJim Jagielski# checked if the used currency symbol is the usedInCompatibleFormatCodes 30*b1cdbd2cSJim Jagielski# currency symbol as it is needed by the number formatter. 31*b1cdbd2cSJim Jagielski# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) 32*b1cdbd2cSJim Jagielski# is used instead of a real currency symbol. 33*b1cdbd2cSJim Jagielski# Author: Eike Rathke <er@openoffice.org> 34*b1cdbd2cSJim Jagielski 35*b1cdbd2cSJim JagielskiBEGIN { 36*b1cdbd2cSJim Jagielski file = "" 37*b1cdbd2cSJim Jagielski} 38*b1cdbd2cSJim Jagielski 39*b1cdbd2cSJim Jagielski 40*b1cdbd2cSJim Jagielskifile != FILENAME { 41*b1cdbd2cSJim Jagielski if ( file ) 42*b1cdbd2cSJim Jagielski checkIt() 43*b1cdbd2cSJim Jagielski file = FILENAME 44*b1cdbd2cSJim Jagielski line = 0 45*b1cdbd2cSJim Jagielski nFormats = 0 46*b1cdbd2cSJim Jagielski nCurrencies = 0 47*b1cdbd2cSJim Jagielski bFormatAuto = 0 48*b1cdbd2cSJim Jagielski sReplaceFrom = "" 49*b1cdbd2cSJim Jagielski sReplaceTo = "" 50*b1cdbd2cSJim Jagielski sMatchReplace = "" 51*b1cdbd2cSJim Jagielski sRefCurrencyFromLocale = "" 52*b1cdbd2cSJim Jagielski crlf = 0 53*b1cdbd2cSJim Jagielski} 54*b1cdbd2cSJim Jagielski 55*b1cdbd2cSJim Jagielski{ 56*b1cdbd2cSJim Jagielski ++line 57*b1cdbd2cSJim Jagielski # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings 58*b1cdbd2cSJim Jagielski # are boo anyways. 59*b1cdbd2cSJim Jagielski if ( /\x0D$/ ) 60*b1cdbd2cSJim Jagielski { 61*b1cdbd2cSJim Jagielski print "Error: not Unix line ending in line " line 62*b1cdbd2cSJim Jagielski crlf = 1 63*b1cdbd2cSJim Jagielski exit(1) 64*b1cdbd2cSJim Jagielski } 65*b1cdbd2cSJim Jagielski if ( $1 ~ /^<LC_FORMAT(>|$)/ ) 66*b1cdbd2cSJim Jagielski { 67*b1cdbd2cSJim Jagielski if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) 68*b1cdbd2cSJim Jagielski { 69*b1cdbd2cSJim Jagielski sReplaceFrom = "\\[CURRENCY\\]" 70*b1cdbd2cSJim Jagielski sMatchReplace = "^<FormatCode>.*" sReplaceFrom 71*b1cdbd2cSJim Jagielski } 72*b1cdbd2cSJim Jagielski for ( j=2; j<=NF; ++j ) 73*b1cdbd2cSJim Jagielski { 74*b1cdbd2cSJim Jagielski if ( $j ~ /^replaceTo="/ ) 75*b1cdbd2cSJim Jagielski { 76*b1cdbd2cSJim Jagielski l = 12 77*b1cdbd2cSJim Jagielski if ( $j ~ />$/ ) 78*b1cdbd2cSJim Jagielski ++l 79*b1cdbd2cSJim Jagielski if ( $j ~ /\/>$/ ) 80*b1cdbd2cSJim Jagielski ++l 81*b1cdbd2cSJim Jagielski sReplaceTo = substr( $j, 12, length($j)-l ) 82*b1cdbd2cSJim Jagielski } 83*b1cdbd2cSJim Jagielski } 84*b1cdbd2cSJim Jagielski } 85*b1cdbd2cSJim Jagielski else if ( $1 ~ /^<FormatElement(>|$)/ ) 86*b1cdbd2cSJim Jagielski { 87*b1cdbd2cSJim Jagielski if ( $0 ~ /usage="CURRENCY"/ ) 88*b1cdbd2cSJim Jagielski { 89*b1cdbd2cSJim Jagielski if ( $0 ~ /formatindex="1[23457]"/ ) 90*b1cdbd2cSJim Jagielski bFormatAuto = 1 91*b1cdbd2cSJim Jagielski else 92*b1cdbd2cSJim Jagielski bFormatAuto = 0 93*b1cdbd2cSJim Jagielski } 94*b1cdbd2cSJim Jagielski } 95*b1cdbd2cSJim Jagielski else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || 96*b1cdbd2cSJim Jagielski (sMatchReplace && $0 ~ sMatchReplace ) ) 97*b1cdbd2cSJim Jagielski { 98*b1cdbd2cSJim Jagielski if ( sReplaceFrom ) 99*b1cdbd2cSJim Jagielski gsub( sReplaceFrom, sReplaceTo ) 100*b1cdbd2cSJim Jagielski split( $0, arr, /<|>/ ) 101*b1cdbd2cSJim Jagielski split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) 102*b1cdbd2cSJim Jagielski for ( j in code ) 103*b1cdbd2cSJim Jagielski { 104*b1cdbd2cSJim Jagielski if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) 105*b1cdbd2cSJim Jagielski { 106*b1cdbd2cSJim Jagielski FormatLine[nFormats] = file " line " line 107*b1cdbd2cSJim Jagielski FormatAuto[nFormats] = bFormatAuto 108*b1cdbd2cSJim Jagielski Formats[nFormats++] = code[j] 109*b1cdbd2cSJim Jagielski } 110*b1cdbd2cSJim Jagielski } 111*b1cdbd2cSJim Jagielski bFormatAuto = 0 112*b1cdbd2cSJim Jagielski } 113*b1cdbd2cSJim Jagielski else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) 114*b1cdbd2cSJim Jagielski { 115*b1cdbd2cSJim Jagielski for ( j=2; j<=NF; ++j ) 116*b1cdbd2cSJim Jagielski { 117*b1cdbd2cSJim Jagielski if ( $j ~ /^ref="/ ) 118*b1cdbd2cSJim Jagielski { 119*b1cdbd2cSJim Jagielski l = 6 120*b1cdbd2cSJim Jagielski if ( $j ~ />$/ ) 121*b1cdbd2cSJim Jagielski ++l 122*b1cdbd2cSJim Jagielski if ( $j ~ /\/>$/ ) 123*b1cdbd2cSJim Jagielski ++l 124*b1cdbd2cSJim Jagielski locale = substr( $j, 6, length($j)-l ) 125*b1cdbd2cSJim Jagielski sRefCurrencyFromLocale = file 126*b1cdbd2cSJim Jagielski oldfile = file 127*b1cdbd2cSJim Jagielski oldline = line 128*b1cdbd2cSJim Jagielski file = locale ".xml" 129*b1cdbd2cSJim Jagielski line = 0 130*b1cdbd2cSJim Jagielski while ( (getline <file) > 0 ) 131*b1cdbd2cSJim Jagielski { 132*b1cdbd2cSJim Jagielski ++line 133*b1cdbd2cSJim Jagielski getCurrencyParams() 134*b1cdbd2cSJim Jagielski } 135*b1cdbd2cSJim Jagielski close( file ) 136*b1cdbd2cSJim Jagielski if ( !line ) 137*b1cdbd2cSJim Jagielski print "ref locale not available: " file \ 138*b1cdbd2cSJim Jagielski " (from " oldfile " line " oldline ")" 139*b1cdbd2cSJim Jagielski file = oldfile 140*b1cdbd2cSJim Jagielski line = oldline 141*b1cdbd2cSJim Jagielski sRefCurrencyFromLocale = "" 142*b1cdbd2cSJim Jagielski } 143*b1cdbd2cSJim Jagielski } 144*b1cdbd2cSJim Jagielski } 145*b1cdbd2cSJim Jagielski else 146*b1cdbd2cSJim Jagielski getCurrencyParams() 147*b1cdbd2cSJim Jagielski} 148*b1cdbd2cSJim Jagielski 149*b1cdbd2cSJim Jagielski 150*b1cdbd2cSJim JagielskiEND { 151*b1cdbd2cSJim Jagielski if ( file && !crlf ) 152*b1cdbd2cSJim Jagielski checkIt() 153*b1cdbd2cSJim Jagielski} 154*b1cdbd2cSJim Jagielski 155*b1cdbd2cSJim Jagielski 156*b1cdbd2cSJim Jagielskifunction getCurrencyParams() { 157*b1cdbd2cSJim Jagielski # Assumes that each element is on a line on its own! 158*b1cdbd2cSJim Jagielski if ( $1 ~ /^<Currency(>|$)/ ) 159*b1cdbd2cSJim Jagielski { 160*b1cdbd2cSJim Jagielski if ( $0 ~ /default="true"/ ) 161*b1cdbd2cSJim Jagielski SymbolDefault[nCurrencies] = 1 162*b1cdbd2cSJim Jagielski else 163*b1cdbd2cSJim Jagielski SymbolDefault[nCurrencies] = 0 164*b1cdbd2cSJim Jagielski if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) 165*b1cdbd2cSJim Jagielski SymbolCompati[nCurrencies] = 1 166*b1cdbd2cSJim Jagielski else 167*b1cdbd2cSJim Jagielski SymbolCompati[nCurrencies] = 0 168*b1cdbd2cSJim Jagielski } 169*b1cdbd2cSJim Jagielski else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) 170*b1cdbd2cSJim Jagielski { 171*b1cdbd2cSJim Jagielski split( $0, arr, /<|>/ ) 172*b1cdbd2cSJim Jagielski if ( sRefCurrencyFromLocale ) 173*b1cdbd2cSJim Jagielski IDLine[nCurrencies] = file " line " line \ 174*b1cdbd2cSJim Jagielski " (referenced from " sRefCurrencyFromLocale ")" 175*b1cdbd2cSJim Jagielski else 176*b1cdbd2cSJim Jagielski IDLine[nCurrencies] = file " line " line 177*b1cdbd2cSJim Jagielski IDs[nCurrencies] = arr[3] 178*b1cdbd2cSJim Jagielski } 179*b1cdbd2cSJim Jagielski else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) 180*b1cdbd2cSJim Jagielski { 181*b1cdbd2cSJim Jagielski split( $0, arr, /<|>/ ) 182*b1cdbd2cSJim Jagielski if ( sRefCurrencyFromLocale ) 183*b1cdbd2cSJim Jagielski SymbolLine[nCurrencies] = file " line " line \ 184*b1cdbd2cSJim Jagielski " (referenced from " sRefCurrencyFromLocale ")" 185*b1cdbd2cSJim Jagielski else 186*b1cdbd2cSJim Jagielski SymbolLine[nCurrencies] = file " line " line 187*b1cdbd2cSJim Jagielski Symbols[nCurrencies] = arr[3] 188*b1cdbd2cSJim Jagielski } 189*b1cdbd2cSJim Jagielski else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) 190*b1cdbd2cSJim Jagielski { 191*b1cdbd2cSJim Jagielski split( $0, arr, /<|>/ ) 192*b1cdbd2cSJim Jagielski if ( sRefCurrencyFromLocale ) 193*b1cdbd2cSJim Jagielski BankSymbolLine[nCurrencies] = file " line " line \ 194*b1cdbd2cSJim Jagielski " (referenced from " sRefCurrencyFromLocale ")" 195*b1cdbd2cSJim Jagielski else 196*b1cdbd2cSJim Jagielski BankSymbolLine[nCurrencies] = file " line " line 197*b1cdbd2cSJim Jagielski BankSymbols[nCurrencies] = arr[3] 198*b1cdbd2cSJim Jagielski } 199*b1cdbd2cSJim Jagielski else if ( $1 ~ /^<\/Currency>/ ) 200*b1cdbd2cSJim Jagielski { 201*b1cdbd2cSJim Jagielski ++nCurrencies 202*b1cdbd2cSJim Jagielski } 203*b1cdbd2cSJim Jagielski} 204*b1cdbd2cSJim Jagielski 205*b1cdbd2cSJim Jagielski 206*b1cdbd2cSJim Jagielskifunction checkIt() { 207*b1cdbd2cSJim Jagielski bad = 0 208*b1cdbd2cSJim Jagielski for ( j=0; j<nFormats; ++j ) 209*b1cdbd2cSJim Jagielski { 210*b1cdbd2cSJim Jagielski state = FormatInSymbol( Formats[j] ) 211*b1cdbd2cSJim Jagielski if ( Formats[j] == "\xc2\xa4" ) 212*b1cdbd2cSJim Jagielski { 213*b1cdbd2cSJim Jagielski bad = 1 214*b1cdbd2cSJim Jagielski print " bad: `" Formats[j] "' (" FormatLine[j] ")" 215*b1cdbd2cSJim Jagielski } 216*b1cdbd2cSJim Jagielski else if ( state == 0 ) 217*b1cdbd2cSJim Jagielski { 218*b1cdbd2cSJim Jagielski bad = 1 219*b1cdbd2cSJim Jagielski print "unknown: `" Formats[j] "' (" FormatLine[j] ")" 220*b1cdbd2cSJim Jagielski } 221*b1cdbd2cSJim Jagielski else if ( FormatAuto[j] && state < 2 ) 222*b1cdbd2cSJim Jagielski { 223*b1cdbd2cSJim Jagielski bad = 1 224*b1cdbd2cSJim Jagielski print "badauto: `" Formats[j] "' (" FormatLine[j] ")" 225*b1cdbd2cSJim Jagielski } 226*b1cdbd2cSJim Jagielski } 227*b1cdbd2cSJim Jagielski if ( bad ) 228*b1cdbd2cSJim Jagielski { 229*b1cdbd2cSJim Jagielski for ( j=0; j<nCurrencies; ++j ) 230*b1cdbd2cSJim Jagielski { 231*b1cdbd2cSJim Jagielski bDef = 0 232*b1cdbd2cSJim Jagielski if ( Symbols[j] == "\xc2\xa4" ) 233*b1cdbd2cSJim Jagielski print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 234*b1cdbd2cSJim Jagielski if ( SymbolDefault[j] ) 235*b1cdbd2cSJim Jagielski { 236*b1cdbd2cSJim Jagielski bDef = 1 237*b1cdbd2cSJim Jagielski print "default: `" Symbols[j] "' (" SymbolLine[j] ")" 238*b1cdbd2cSJim Jagielski } 239*b1cdbd2cSJim Jagielski if ( SymbolCompati[j] ) 240*b1cdbd2cSJim Jagielski { 241*b1cdbd2cSJim Jagielski bDef = 1 242*b1cdbd2cSJim Jagielski print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" 243*b1cdbd2cSJim Jagielski } 244*b1cdbd2cSJim Jagielski if ( !bDef ) 245*b1cdbd2cSJim Jagielski print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" 246*b1cdbd2cSJim Jagielski } 247*b1cdbd2cSJim Jagielski } 248*b1cdbd2cSJim Jagielski else 249*b1cdbd2cSJim Jagielski { 250*b1cdbd2cSJim Jagielski bHasDefault = 0 251*b1cdbd2cSJim Jagielski bHasCompati = 0 252*b1cdbd2cSJim Jagielski for ( j=0; j<nCurrencies; ++j ) 253*b1cdbd2cSJim Jagielski { 254*b1cdbd2cSJim Jagielski if ( Symbols[j] == "\xc2\xa4" ) 255*b1cdbd2cSJim Jagielski { 256*b1cdbd2cSJim Jagielski bad = 1 257*b1cdbd2cSJim Jagielski print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 258*b1cdbd2cSJim Jagielski } 259*b1cdbd2cSJim Jagielski if ( SymbolDefault[j] ) 260*b1cdbd2cSJim Jagielski { 261*b1cdbd2cSJim Jagielski if ( !bHasDefault ) 262*b1cdbd2cSJim Jagielski bHasDefault = 1 263*b1cdbd2cSJim Jagielski else 264*b1cdbd2cSJim Jagielski { 265*b1cdbd2cSJim Jagielski bad = 1 266*b1cdbd2cSJim Jagielski print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" 267*b1cdbd2cSJim Jagielski } 268*b1cdbd2cSJim Jagielski } 269*b1cdbd2cSJim Jagielski if ( SymbolCompati[j] ) 270*b1cdbd2cSJim Jagielski { 271*b1cdbd2cSJim Jagielski if ( !bHasCompati ) 272*b1cdbd2cSJim Jagielski bHasCompati = 1 273*b1cdbd2cSJim Jagielski else 274*b1cdbd2cSJim Jagielski { 275*b1cdbd2cSJim Jagielski bad = 1 276*b1cdbd2cSJim Jagielski print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" 277*b1cdbd2cSJim Jagielski } 278*b1cdbd2cSJim Jagielski } 279*b1cdbd2cSJim Jagielski } 280*b1cdbd2cSJim Jagielski if ( !bHasDefault ) 281*b1cdbd2cSJim Jagielski { 282*b1cdbd2cSJim Jagielski bad = 1 283*b1cdbd2cSJim Jagielski print " no default: (" file ")" 284*b1cdbd2cSJim Jagielski } 285*b1cdbd2cSJim Jagielski if ( !bHasCompati ) 286*b1cdbd2cSJim Jagielski { 287*b1cdbd2cSJim Jagielski bad = 1 288*b1cdbd2cSJim Jagielski print " no compati: (" file ")" 289*b1cdbd2cSJim Jagielski } 290*b1cdbd2cSJim Jagielski } 291*b1cdbd2cSJim Jagielski for ( j=0; j<nCurrencies; ++j ) 292*b1cdbd2cSJim Jagielski { 293*b1cdbd2cSJim Jagielski # Check if CurrencyID at least resembles some ISO 4217 code. 294*b1cdbd2cSJim Jagielski # The only exception is zh_MO that had an erroneous original data set 295*b1cdbd2cSJim Jagielski # with BankSymbol="P" (stored as ISO code in documents, hence copied to 296*b1cdbd2cSJim Jagielski # CurrencyID now) and needs that entry for legacy documents. 297*b1cdbd2cSJim Jagielski # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on 298*b1cdbd2cSJim Jagielski # lower case except 'a', regardless of IGNORECASE setting, hence this 299*b1cdbd2cSJim Jagielski # ugly notation. [[:upper:]] wouldn't be correct since we want only 300*b1cdbd2cSJim Jagielski # ASCII to match. 301*b1cdbd2cSJim Jagielski if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ 302*b1cdbd2cSJim Jagielski && !(file == "zh_MO.xml" && IDs[j] == "P") ) 303*b1cdbd2cSJim Jagielski { 304*b1cdbd2cSJim Jagielski bad = 1 305*b1cdbd2cSJim Jagielski print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" 306*b1cdbd2cSJim Jagielski } 307*b1cdbd2cSJim Jagielski # CurrencyID should equal BankSymbol for now. 308*b1cdbd2cSJim Jagielski if ( IDs[j] != BankSymbols[j] ) 309*b1cdbd2cSJim Jagielski { 310*b1cdbd2cSJim Jagielski bad = 1 311*b1cdbd2cSJim Jagielski print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ 312*b1cdbd2cSJim Jagielski "' (" IDLine[j] " and " BankSymbolLine[j] ")" 313*b1cdbd2cSJim Jagielski } 314*b1cdbd2cSJim Jagielski } 315*b1cdbd2cSJim Jagielski if ( bad ) 316*b1cdbd2cSJim Jagielski print "" 317*b1cdbd2cSJim Jagielski} 318*b1cdbd2cSJim Jagielski 319*b1cdbd2cSJim Jagielski 320*b1cdbd2cSJim Jagielskifunction FormatInSymbol( format ) { 321*b1cdbd2cSJim Jagielski state = 0 322*b1cdbd2cSJim Jagielski for ( nSym=0; nSym<nCurrencies; ++nSym ) 323*b1cdbd2cSJim Jagielski { 324*b1cdbd2cSJim Jagielski if ( format == Symbols[nSym] ) 325*b1cdbd2cSJim Jagielski { 326*b1cdbd2cSJim Jagielski # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' 327*b1cdbd2cSJim Jagielski # for AZM and AZN), continue to lookup if the match isn't the 328*b1cdbd2cSJim Jagielski # compatible one. 329*b1cdbd2cSJim Jagielski if ( SymbolCompati[nSym] ) 330*b1cdbd2cSJim Jagielski return 2 331*b1cdbd2cSJim Jagielski else 332*b1cdbd2cSJim Jagielski state = 1 333*b1cdbd2cSJim Jagielski } 334*b1cdbd2cSJim Jagielski } 335*b1cdbd2cSJim Jagielski return state 336*b1cdbd2cSJim Jagielski} 337*b1cdbd2cSJim Jagielski 338*b1cdbd2cSJim Jagielski# vim: ts=4 sw=4 expandtab 339