1#!/usr/bin/gawk -f 2# ************************************************************* 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21# ************************************************************* 22# Usage: gawk -f currency-check.awk *.xml 23# Check any 24# <FormatCode>...[$xxx-...]...</FormatCode> 25# against every 26# <CurrencySymbol>xxx</CurrencySymbol> 27# definition of the same XML file and output symbols if no match was found. 28# For formatindex="12" to formatindex="15" and for formatindex="17" it is 29# checked if the used currency symbol is the usedInCompatibleFormatCodes 30# currency symbol as it is needed by the number formatter. 31# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) 32# is used instead of a real currency symbol. 33# Author: Eike Rathke <er@openoffice.org> 34 35BEGIN { 36 file = "" 37} 38 39 40file != FILENAME { 41 if ( file ) 42 checkIt() 43 file = FILENAME 44 line = 0 45 nFormats = 0 46 nCurrencies = 0 47 bFormatAuto = 0 48 sReplaceFrom = "" 49 sReplaceTo = "" 50 sMatchReplace = "" 51 sRefCurrencyFromLocale = "" 52 crlf = 0 53} 54 55{ 56 ++line 57 # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings 58 # are boo anyways. 59 if ( /\x0D$/ ) 60 { 61 print "Error: not Unix line ending in line " line 62 crlf = 1 63 exit(1) 64 } 65 if ( $1 ~ /^<LC_FORMAT(>|$)/ ) 66 { 67 if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) 68 { 69 sReplaceFrom = "\\[CURRENCY\\]" 70 sMatchReplace = "^<FormatCode>.*" sReplaceFrom 71 } 72 for ( j=2; j<=NF; ++j ) 73 { 74 if ( $j ~ /^replaceTo="/ ) 75 { 76 l = 12 77 if ( $j ~ />$/ ) 78 ++l 79 if ( $j ~ /\/>$/ ) 80 ++l 81 sReplaceTo = substr( $j, 12, length($j)-l ) 82 } 83 } 84 } 85 else if ( $1 ~ /^<FormatElement(>|$)/ ) 86 { 87 if ( $0 ~ /usage="CURRENCY"/ ) 88 { 89 if ( $0 ~ /formatindex="1[23457]"/ ) 90 bFormatAuto = 1 91 else 92 bFormatAuto = 0 93 } 94 } 95 else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || 96 (sMatchReplace && $0 ~ sMatchReplace ) ) 97 { 98 if ( sReplaceFrom ) 99 gsub( sReplaceFrom, sReplaceTo ) 100 split( $0, arr, /<|>/ ) 101 split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) 102 for ( j in code ) 103 { 104 if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) 105 { 106 FormatLine[nFormats] = file " line " line 107 FormatAuto[nFormats] = bFormatAuto 108 Formats[nFormats++] = code[j] 109 } 110 } 111 bFormatAuto = 0 112 } 113 else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) 114 { 115 for ( j=2; j<=NF; ++j ) 116 { 117 if ( $j ~ /^ref="/ ) 118 { 119 l = 6 120 if ( $j ~ />$/ ) 121 ++l 122 if ( $j ~ /\/>$/ ) 123 ++l 124 locale = substr( $j, 6, length($j)-l ) 125 sRefCurrencyFromLocale = file 126 oldfile = file 127 oldline = line 128 file = locale ".xml" 129 line = 0 130 while ( (getline <file) > 0 ) 131 { 132 ++line 133 getCurrencyParams() 134 } 135 close( file ) 136 if ( !line ) 137 print "ref locale not available: " file \ 138 " (from " oldfile " line " oldline ")" 139 file = oldfile 140 line = oldline 141 sRefCurrencyFromLocale = "" 142 } 143 } 144 } 145 else 146 getCurrencyParams() 147} 148 149 150END { 151 if ( file && !crlf ) 152 checkIt() 153} 154 155 156function getCurrencyParams() { 157 # Assumes that each element is on a line on its own! 158 if ( $1 ~ /^<Currency(>|$)/ ) 159 { 160 if ( $0 ~ /default="true"/ ) 161 SymbolDefault[nCurrencies] = 1 162 else 163 SymbolDefault[nCurrencies] = 0 164 if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) 165 SymbolCompati[nCurrencies] = 1 166 else 167 SymbolCompati[nCurrencies] = 0 168 } 169 else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) 170 { 171 split( $0, arr, /<|>/ ) 172 if ( sRefCurrencyFromLocale ) 173 IDLine[nCurrencies] = file " line " line \ 174 " (referenced from " sRefCurrencyFromLocale ")" 175 else 176 IDLine[nCurrencies] = file " line " line 177 IDs[nCurrencies] = arr[3] 178 } 179 else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) 180 { 181 split( $0, arr, /<|>/ ) 182 if ( sRefCurrencyFromLocale ) 183 SymbolLine[nCurrencies] = file " line " line \ 184 " (referenced from " sRefCurrencyFromLocale ")" 185 else 186 SymbolLine[nCurrencies] = file " line " line 187 Symbols[nCurrencies] = arr[3] 188 } 189 else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) 190 { 191 split( $0, arr, /<|>/ ) 192 if ( sRefCurrencyFromLocale ) 193 BankSymbolLine[nCurrencies] = file " line " line \ 194 " (referenced from " sRefCurrencyFromLocale ")" 195 else 196 BankSymbolLine[nCurrencies] = file " line " line 197 BankSymbols[nCurrencies] = arr[3] 198 } 199 else if ( $1 ~ /^<\/Currency>/ ) 200 { 201 ++nCurrencies 202 } 203} 204 205 206function checkIt() { 207 bad = 0 208 for ( j=0; j<nFormats; ++j ) 209 { 210 state = FormatInSymbol( Formats[j] ) 211 if ( Formats[j] == "\xc2\xa4" ) 212 { 213 bad = 1 214 print " bad: `" Formats[j] "' (" FormatLine[j] ")" 215 } 216 else if ( state == 0 ) 217 { 218 bad = 1 219 print "unknown: `" Formats[j] "' (" FormatLine[j] ")" 220 } 221 else if ( FormatAuto[j] && state < 2 ) 222 { 223 bad = 1 224 print "badauto: `" Formats[j] "' (" FormatLine[j] ")" 225 } 226 } 227 if ( bad ) 228 { 229 for ( j=0; j<nCurrencies; ++j ) 230 { 231 bDef = 0 232 if ( Symbols[j] == "\xc2\xa4" ) 233 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 234 if ( SymbolDefault[j] ) 235 { 236 bDef = 1 237 print "default: `" Symbols[j] "' (" SymbolLine[j] ")" 238 } 239 if ( SymbolCompati[j] ) 240 { 241 bDef = 1 242 print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" 243 } 244 if ( !bDef ) 245 print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" 246 } 247 } 248 else 249 { 250 bHasDefault = 0 251 bHasCompati = 0 252 for ( j=0; j<nCurrencies; ++j ) 253 { 254 if ( Symbols[j] == "\xc2\xa4" ) 255 { 256 bad = 1 257 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 258 } 259 if ( SymbolDefault[j] ) 260 { 261 if ( !bHasDefault ) 262 bHasDefault = 1 263 else 264 { 265 bad = 1 266 print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" 267 } 268 } 269 if ( SymbolCompati[j] ) 270 { 271 if ( !bHasCompati ) 272 bHasCompati = 1 273 else 274 { 275 bad = 1 276 print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" 277 } 278 } 279 } 280 if ( !bHasDefault ) 281 { 282 bad = 1 283 print " no default: (" file ")" 284 } 285 if ( !bHasCompati ) 286 { 287 bad = 1 288 print " no compati: (" file ")" 289 } 290 } 291 for ( j=0; j<nCurrencies; ++j ) 292 { 293 # Check if CurrencyID at least resembles some ISO 4217 code. 294 # The only exception is zh_MO that had an erroneous original data set 295 # with BankSymbol="P" (stored as ISO code in documents, hence copied to 296 # CurrencyID now) and needs that entry for legacy documents. 297 # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on 298 # lower case except 'a', regardless of IGNORECASE setting, hence this 299 # ugly notation. [[:upper:]] wouldn't be correct since we want only 300 # ASCII to match. 301 if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ 302 && !(file == "zh_MO.xml" && IDs[j] == "P") ) 303 { 304 bad = 1 305 print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" 306 } 307 # CurrencyID should equal BankSymbol for now. 308 if ( IDs[j] != BankSymbols[j] ) 309 { 310 bad = 1 311 print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ 312 "' (" IDLine[j] " and " BankSymbolLine[j] ")" 313 } 314 } 315 if ( bad ) 316 print "" 317} 318 319 320function FormatInSymbol( format ) { 321 state = 0 322 for ( nSym=0; nSym<nCurrencies; ++nSym ) 323 { 324 if ( format == Symbols[nSym] ) 325 { 326 # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' 327 # for AZM and AZN), continue to lookup if the match isn't the 328 # compatible one. 329 if ( SymbolCompati[nSym] ) 330 return 2 331 else 332 state = 1 333 } 334 } 335 return state 336} 337 338# vim: ts=4 sw=4 expandtab 339