1#!/usr/bin/gawk -f
2# *************************************************************
3#
4#  Licensed to the Apache Software Foundation (ASF) under one
5#  or more contributor license agreements.  See the NOTICE file
6#  distributed with this work for additional information
7#  regarding copyright ownership.  The ASF licenses this file
8#  to you under the Apache License, Version 2.0 (the
9#  "License"); you may not use this file except in compliance
10#  with the License.  You may obtain a copy of the License at
11#
12#    http://www.apache.org/licenses/LICENSE-2.0
13#
14#  Unless required by applicable law or agreed to in writing,
15#  software distributed under the License is distributed on an
16#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17#  KIND, either express or implied.  See the License for the
18#  specific language governing permissions and limitations
19#  under the License.
20#
21# *************************************************************
22# Usage: gawk -f currency-check.awk *.xml
23# Check any
24# <FormatCode>...[$xxx-...]...</FormatCode>
25# against every
26# <CurrencySymbol>xxx</CurrencySymbol>
27# definition of the same XML file and output symbols if no match was found.
28# For formatindex="12" to formatindex="15" and for formatindex="17" it is
29# checked if the used currency symbol is the usedInCompatibleFormatCodes
30# currency symbol as it is needed by the number formatter.
31# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
32# is used instead of a real currency symbol.
33# Author: Eike Rathke <er@openoffice.org>
34
35BEGIN {
36    file = ""
37}
38
39
40file != FILENAME {
41    if ( file )
42        checkIt()
43    file = FILENAME
44    line = 0
45    nFormats = 0
46    nCurrencies = 0
47    bFormatAuto = 0
48    sReplaceFrom = ""
49    sReplaceTo = ""
50    sMatchReplace = ""
51    sRefCurrencyFromLocale = ""
52    crlf = 0
53}
54
55{
56    ++line
57    # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
58    # are boo anyways.
59    if ( /\x0D$/ )
60    {
61        print "Error: not Unix line ending in line " line
62        crlf = 1
63        exit(1)
64    }
65    if ( $1 ~ /^<LC_FORMAT(>|$)/ )
66    {
67        if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
68        {
69            sReplaceFrom = "\\[CURRENCY\\]"
70            sMatchReplace = "^<FormatCode>.*" sReplaceFrom
71        }
72        for ( j=2; j<=NF; ++j )
73        {
74            if ( $j ~ /^replaceTo="/ )
75            {
76                l = 12
77                if ( $j ~ />$/ )
78                    ++l
79                if ( $j ~ /\/>$/ )
80                    ++l
81                sReplaceTo = substr( $j, 12, length($j)-l )
82            }
83        }
84    }
85    else if ( $1 ~ /^<FormatElement(>|$)/ )
86    {
87        if ( $0 ~ /usage="CURRENCY"/ )
88        {
89            if ( $0 ~ /formatindex="1[23457]"/ )
90                bFormatAuto = 1
91            else
92                bFormatAuto = 0
93        }
94    }
95    else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
96            (sMatchReplace && $0 ~ sMatchReplace ) )
97    {
98        if ( sReplaceFrom )
99            gsub( sReplaceFrom, sReplaceTo )
100        split( $0, arr, /<|>/ )
101        split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
102        for ( j in code )
103        {
104            if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
105            {
106                FormatLine[nFormats] = file " line " line
107                FormatAuto[nFormats] = bFormatAuto
108                Formats[nFormats++] = code[j]
109            }
110        }
111        bFormatAuto = 0
112    }
113    else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
114    {
115        for ( j=2; j<=NF; ++j )
116        {
117            if ( $j ~ /^ref="/ )
118            {
119                l = 6
120                if ( $j ~ />$/ )
121                    ++l
122                if ( $j ~ /\/>$/ )
123                    ++l
124                locale = substr( $j, 6, length($j)-l )
125                sRefCurrencyFromLocale = file
126                oldfile = file
127                oldline = line
128                file = locale ".xml"
129                line = 0
130                while ( (getline <file) > 0 )
131                {
132                    ++line
133                    getCurrencyParams()
134                }
135                close( file )
136                if ( !line )
137                    print "ref locale not available: " file \
138                        " (from " oldfile " line " oldline ")"
139                file = oldfile
140                line = oldline
141                sRefCurrencyFromLocale = ""
142            }
143        }
144    }
145    else
146        getCurrencyParams()
147}
148
149
150END {
151    if ( file && !crlf )
152        checkIt()
153}
154
155
156function getCurrencyParams() {
157    # Assumes that each element is on a line on its own!
158    if ( $1 ~ /^<Currency(>|$)/ )
159    {
160        if ( $0 ~ /default="true"/ )
161            SymbolDefault[nCurrencies] = 1
162        else
163            SymbolDefault[nCurrencies] = 0
164        if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
165            SymbolCompati[nCurrencies] = 1
166        else
167            SymbolCompati[nCurrencies] = 0
168    }
169    else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
170    {
171        split( $0, arr, /<|>/ )
172        if ( sRefCurrencyFromLocale )
173            IDLine[nCurrencies] = file " line " line \
174                " (referenced from " sRefCurrencyFromLocale ")"
175        else
176            IDLine[nCurrencies] = file " line " line
177        IDs[nCurrencies] = arr[3]
178    }
179    else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
180    {
181        split( $0, arr, /<|>/ )
182        if ( sRefCurrencyFromLocale )
183            SymbolLine[nCurrencies] = file " line " line \
184                " (referenced from " sRefCurrencyFromLocale ")"
185        else
186            SymbolLine[nCurrencies] = file " line " line
187        Symbols[nCurrencies] = arr[3]
188    }
189    else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
190    {
191        split( $0, arr, /<|>/ )
192        if ( sRefCurrencyFromLocale )
193            BankSymbolLine[nCurrencies] = file " line " line \
194                " (referenced from " sRefCurrencyFromLocale ")"
195        else
196            BankSymbolLine[nCurrencies] = file " line " line
197        BankSymbols[nCurrencies] = arr[3]
198    }
199    else if ( $1 ~ /^<\/Currency>/ )
200    {
201        ++nCurrencies
202    }
203}
204
205
206function checkIt() {
207    bad = 0
208    for ( j=0; j<nFormats; ++j )
209    {
210        state = FormatInSymbol( Formats[j] )
211        if ( Formats[j] == "\xc2\xa4" )
212        {
213            bad = 1
214            print "    bad: `" Formats[j] "'   (" FormatLine[j] ")"
215        }
216        else if ( state == 0 )
217        {
218            bad = 1
219            print "unknown: `" Formats[j] "'   (" FormatLine[j] ")"
220        }
221        else if ( FormatAuto[j] && state < 2 )
222        {
223            bad = 1
224            print "badauto: `" Formats[j] "'   (" FormatLine[j] ")"
225        }
226    }
227    if ( bad )
228    {
229        for ( j=0; j<nCurrencies; ++j )
230        {
231            bDef = 0
232            if ( Symbols[j] == "\xc2\xa4" )
233                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
234            if ( SymbolDefault[j] )
235            {
236                bDef = 1
237                print "default: `" Symbols[j] "'   (" SymbolLine[j] ")"
238            }
239            if ( SymbolCompati[j] )
240            {
241                bDef = 1
242                print "compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
243            }
244            if ( !bDef )
245                print "defined: `" Symbols[j] "'   (" SymbolLine[j] ")"
246        }
247    }
248    else
249    {
250        bHasDefault = 0
251        bHasCompati = 0
252        for ( j=0; j<nCurrencies; ++j )
253        {
254            if ( Symbols[j] == "\xc2\xa4" )
255            {
256                bad = 1
257                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
258            }
259            if ( SymbolDefault[j] )
260            {
261                if ( !bHasDefault )
262                    bHasDefault = 1
263                else
264                {
265                    bad = 1
266                    print "dupe default: `" Symbols[j] "'   (" SymbolLine[j] ")"
267                }
268            }
269            if ( SymbolCompati[j] )
270            {
271                if ( !bHasCompati )
272                    bHasCompati = 1
273                else
274                {
275                    bad = 1
276                    print "dupe compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
277                }
278            }
279        }
280        if ( !bHasDefault )
281        {
282            bad = 1
283            print "  no default: (" file ")"
284        }
285        if ( !bHasCompati )
286        {
287            bad = 1
288            print "  no compati: (" file ")"
289        }
290    }
291    for ( j=0; j<nCurrencies; ++j )
292    {
293        # Check if CurrencyID at least resembles some ISO 4217 code.
294        # The only exception is zh_MO that had an erroneous original data set
295        # with BankSymbol="P" (stored as ISO code in documents, hence copied to
296        # CurrencyID now) and needs that entry for legacy documents.
297        # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
298        # lower case except 'a', regardless of IGNORECASE setting, hence this
299        # ugly notation. [[:upper:]] wouldn't be correct since we want only
300        # ASCII to match.
301        if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
302              && !(file == "zh_MO.xml" && IDs[j] == "P") )
303        {
304            bad = 1
305            print "no ISO 4217 code: `" IDs[j] "'   (" IDLine[j] ")"
306        }
307        # CurrencyID should equal BankSymbol for now.
308        if ( IDs[j] != BankSymbols[j] )
309        {
310            bad = 1
311            print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
312                  "'   (" IDLine[j] " and " BankSymbolLine[j] ")"
313        }
314    }
315    if ( bad )
316        print ""
317}
318
319
320function FormatInSymbol( format ) {
321    state = 0
322    for ( nSym=0; nSym<nCurrencies; ++nSym )
323    {
324        if ( format == Symbols[nSym] )
325        {
326            # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
327            # for AZM and AZN), continue to lookup if the match isn't the
328            # compatible one.
329            if ( SymbolCompati[nSym] )
330                return 2
331            else
332                state = 1
333        }
334    }
335    return state
336}
337
338# vim: ts=4 sw=4 expandtab
339