xref: /aoo41x/main/l10ntools/source/gsicheck.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_l10ntools.hxx"
30 #include <stdio.h>
31 #include <tools/fsys.hxx>
32 #include <tools/stream.hxx>
33 #include <tools/list.hxx>
34 
35 // local includes
36 #include "tagtest.hxx"
37 #include "gsicheck.hxx"
38 
39 #define MAX_GID_LID_LEN 250
40 
41 /*****************************************************************************/
42 void PrintMessage( ByteString aType, ByteString aMsg, ByteString aPrefix,
43 	ByteString aContext, sal_Bool bPrintContext, sal_uLong nLine, ByteString aUniqueId = ByteString() )
44 /*****************************************************************************/
45 {
46 	fprintf( stdout, "%s %s, Line %lu", aType.GetBuffer(), aPrefix.GetBuffer(), nLine );
47 	if ( aUniqueId.Len() )
48 		fprintf( stdout, ", UniqueID %s", aUniqueId.GetBuffer() );
49 	fprintf( stdout, ": %s", aMsg.GetBuffer() );
50 
51 	if ( bPrintContext )
52 		fprintf( stdout, "  \"%s\"", aContext.GetBuffer() );
53 	fprintf( stdout, "\n" );
54 }
55 
56 /*****************************************************************************/
57 void PrintError( ByteString aMsg, ByteString aPrefix,
58 	ByteString aContext, sal_Bool bPrintContext, sal_uLong nLine, ByteString aUniqueId = ByteString() )
59 /*****************************************************************************/
60 {
61     PrintMessage( "Error:", aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
62 }
63 
64 sal_Bool LanguageOK( ByteString aLang )
65 {
66     if ( !aLang.Len() )
67         return sal_False;
68 
69     if ( aLang.IsNumericAscii() )
70         return sal_True;
71 
72     if ( aLang.GetTokenCount( '-' ) == 1 )
73         return aLang.IsAlphaAscii() && aLang.IsLowerAscii();
74     else if ( aLang.GetTokenCount( '-' ) == 2 )
75     {
76         ByteString aTok0( aLang.GetToken( 0, '-' ) );
77         ByteString aTok1( aLang.GetToken( 1, '-' ) );
78         return  aTok0.Len() && aTok0.IsAlphaAscii() && aTok0.IsLowerAscii()
79              && aTok1.Len() && aTok1.IsAlphaAscii() && aTok1.IsUpperAscii()
80              && !aTok1.EqualsIgnoreCaseAscii( aTok0 );
81     }
82 
83     return sal_False;
84 }
85 
86 
87 //
88 // class LazySvFileStream
89 //
90 
91 
92 class LazySvFileStream : public SvFileStream
93 {
94 
95 private:
96     String aFileName;
97     sal_Bool bOpened;
98     StreamMode eOpenMode;
99 
100 public:
101     LazySvFileStream()
102     : aFileName()
103     , bOpened( sal_False )
104     , eOpenMode( 0 )
105     {};
106 
107     void SetOpenParams( const String& rFileName, StreamMode eOpenModeP )
108     {
109         aFileName = rFileName;
110         eOpenMode = eOpenModeP;
111     };
112 
113     void LazyOpen();
114 };
115 
116 void LazySvFileStream::LazyOpen()
117 {
118     if ( !bOpened )
119     {
120         Open( aFileName, eOpenMode );
121 	    if ( !IsOpen())
122 	    {
123 		    fprintf( stderr, "\nERROR: Could not open Output-File %s!\n\n", ByteString( aFileName, RTL_TEXTENCODING_ASCII_US ).GetBuffer() );
124 		    exit ( 4 );
125 	    }
126         bOpened = sal_True;
127     }
128 }
129 
130 
131 //
132 // class GSILine
133 //
134 
135 /*****************************************************************************/
136 GSILine::GSILine( const ByteString &rLine, sal_uLong nLine )
137 /*****************************************************************************/
138 				: ByteString( rLine )
139 				, nLineNumber( nLine )
140 				, bOK( sal_True )
141                 , bFixed ( sal_False )
142 {
143     if ( rLine.GetTokenCount( '\t' ) == 15 )
144     {
145         aFormat = FORMAT_SDF;
146         aUniqId = rLine.GetToken( 0, '\t' );
147         aUniqId.Append("/").Append( rLine.GetToken( 1, '\t' ) ).Append("/").Append( rLine.GetToken( 3, '\t' ) ).Append("/").Append( rLine.GetToken( 4, '\t' ) ).Append("/").Append( rLine.GetToken( 5, '\t' ) ).Append("/").Append( rLine.GetToken( 6, '\t' ) ).Append("/").Append( rLine.GetToken( 7, '\t' ) );
148         aLineType = "";
149         aLangId = rLine.GetToken( 9, '\t' );
150         aText = rLine.GetToken( 10, '\t' );
151         aQuickHelpText = rLine.GetToken( 12, '\t' );
152         aTitle = rLine.GetToken( 13, '\t' );
153 
154         // do some more format checks here
155         if ( !rLine.GetToken( 8, '\t' ).IsNumericAscii() )
156         {
157 		    PrintError( "The length field does not contain a number!", "Line format", rLine.GetToken( 8, '\t' ), sal_True, GetLineNumber(), GetUniqId() );
158 		    NotOK();
159         }
160         if ( !LanguageOK( aLangId ) )
161         {
162 		    PrintError( "The Language is invalid!", "Line format", aLangId, sal_True, GetLineNumber(), GetUniqId() );
163 		    NotOK();
164         }
165         // limit GID and LID to MAX_GID_LID_LEN chars each for database conformity, see #137575#
166         if ( rLine.GetToken( 4, '\t' ).Len() > MAX_GID_LID_LEN || rLine.GetToken( 5, '\t' ).Len() > MAX_GID_LID_LEN )
167         {
168 			PrintError( ByteString("GID and LID may only be ").Append( ByteString::CreateFromInt32(MAX_GID_LID_LEN) ).Append( " chars long each!" ), "Line format", aLangId, sal_True, GetLineNumber(), GetUniqId() );
169 		    NotOK();
170         }
171     }
172     else    // allow tabs in gsi files
173     {
174         aFormat = FORMAT_GSI;
175         ByteString sTmp( rLine );
176         sal_uInt16 nPos = sTmp.Search( "($$)" );
177         sal_uInt16 nStart = 0;
178         if ( nPos != STRING_NOTFOUND )
179         {
180         	aUniqId = sTmp.Copy( nStart, nPos - nStart );
181             nStart = nPos + 4;  // + length of the delemiter
182             nPos = sTmp.Search( "($$)", nStart );
183         }
184         if ( nPos != STRING_NOTFOUND )
185         {
186         	aLineType = sTmp.Copy( nStart, nPos - nStart );
187             nStart = nPos + 4;  // + length of the delemiter
188             nPos = sTmp.Search( "($$)", nStart );
189             aUniqId.Append( "/" );
190             aUniqId.Append( aLineType );
191         }
192         if ( nPos != STRING_NOTFOUND )
193         {
194         	aLangId = sTmp.Copy( nStart, nPos - nStart );
195             nStart = nPos + 4;  // + length of the delemiter
196             nPos = sTmp.Search( "($$)", nStart );
197         }
198         if ( nPos != STRING_NOTFOUND )
199         {
200 //        	ByteString aStatus = sTmp.Copy( nStart, nPos - nStart );     // ext int ...
201             nStart = nPos + 4;  // + length of the delemiter
202         }
203         if ( nPos != STRING_NOTFOUND )
204         	aText = sTmp.Copy( nStart );
205         else
206             aFormat = FORMAT_UNKNOWN;
207     }
208 
209     if ( FORMAT_UNKNOWN == GetLineFormat() )
210         NotOK();
211 }
212 
213 /*****************************************************************************/
214 void GSILine::NotOK()
215 /*****************************************************************************/
216 {
217     bOK = sal_False;
218 }
219 
220 /*****************************************************************************/
221 void GSILine::ReassembleLine()
222 /*****************************************************************************/
223 {
224     ByteString aReassemble;
225     if ( GetLineFormat() == FORMAT_SDF )
226     {
227         sal_uInt16 i;
228         for ( i = 0 ; i < 10 ; i++ )
229         {
230             aReassemble.Append( GetToken( i, '\t' ) );
231             aReassemble.Append( "\t" );
232         }
233         aReassemble.Append( aText );
234         aReassemble.Append( "\t" );
235         aReassemble.Append( GetToken( 11, '\t' ) ); // should be empty but there are some places in sc. Not reflected to sources!!
236         aReassemble.Append( "\t" );
237         aReassemble.Append( aQuickHelpText );
238         aReassemble.Append( "\t" );
239         aReassemble.Append( aTitle );
240         for ( i = 14 ; i < 15 ; i++ )
241         {
242             aReassemble.Append( "\t" );
243             aReassemble.Append( GetToken( i, '\t' ) );
244         }
245         *(ByteString*)this = aReassemble;
246     }
247     else if ( GetLineFormat() == FORMAT_GSI )
248     {
249         sal_uInt16 nPos = Search( "($$)" );
250         sal_uInt16 nStart = 0;
251         if ( nPos != STRING_NOTFOUND )
252         {
253             nStart = nPos + 4;  // + length of the delemiter
254             nPos = Search( "($$)", nStart );
255         }
256         if ( nPos != STRING_NOTFOUND )
257         {
258             nStart = nPos + 4;  // + length of the delemiter
259             nPos = Search( "($$)", nStart );
260         }
261         if ( nPos != STRING_NOTFOUND )
262         {
263             nStart = nPos + 4;  // + length of the delemiter
264             nPos = Search( "($$)", nStart );
265         }
266         if ( nPos != STRING_NOTFOUND )
267         {
268             nStart = nPos + 4;  // + length of the delemiter
269         }
270         if ( nPos != STRING_NOTFOUND )
271         {
272             aReassemble = Copy( 0, nStart );
273             aReassemble += aText;
274             *(ByteString*)this = aReassemble;
275         }
276         else
277             PrintError( "Cannot reassemble GSI line (internal Error).", "Line format", "", sal_False, GetLineNumber(), GetUniqId() );
278     }
279     else
280         PrintError( "Cannot reassemble line of unknown type (internal Error).", "Line format", "", sal_False, GetLineNumber(), GetUniqId() );
281 }
282 
283 //
284 // class GSIBlock
285 //
286 /*****************************************************************************/
287 GSIBlock::GSIBlock( sal_Bool PbPrintContext, sal_Bool bSource, sal_Bool bTrans, sal_Bool bRef, sal_Bool bAllowKID, sal_Bool bAllowSusp )
288 /*****************************************************************************/
289             : pSourceLine( NULL )
290             , pReferenceLine( NULL )
291             , bPrintContext( PbPrintContext )
292             , bCheckSourceLang( bSource )
293             , bCheckTranslationLang( bTrans )
294             , bReference( bRef )
295             , bAllowKeyIDs( bAllowKID )
296             , bAllowSuspicious( bAllowSusp )
297             , bHasBlockError( sal_False )
298 {
299 }
300 
301 /*****************************************************************************/
302 GSIBlock::~GSIBlock()
303 /*****************************************************************************/
304 {
305 	delete pSourceLine;
306 	delete pReferenceLine;
307 
308 	for ( sal_uLong i = 0; i < Count(); i++ )
309 		delete ( GetObject( i ));
310 }
311 
312 /*****************************************************************************/
313 void GSIBlock::InsertLine( GSILine* pLine, ByteString aSourceLang)
314 /*****************************************************************************/
315 {
316 	if ( pLine->GetLanguageId().Equals( aSourceLang ) )
317     {
318         if ( pSourceLine )
319         {
320             PrintError( "Source Language entry double. Treating as Translation.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
321             bHasBlockError = sal_True;
322             pSourceLine->NotOK();
323             pLine->NotOK();
324         }
325         else
326         {
327 		    pSourceLine = pLine;
328             return;
329         }
330     }
331 	sal_uLong nPos = 0;
332 
333     if ( aSourceLang.Len() ) // only check blockstructure if source lang is given
334     {
335 		while ( nPos < Count() )
336         {
337             if ( GetObject( nPos )->GetLanguageId().Equals( pLine->GetLanguageId() ) )
338             {
339                 PrintError( "Translation Language entry double. Checking both.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
340                 bHasBlockError = sal_True;
341                 GetObject( nPos )->NotOK();
342                 pLine->NotOK();
343             }
344 			nPos++;
345         }
346     }
347 	Insert( pLine, LIST_APPEND );
348 }
349 
350 /*****************************************************************************/
351 void GSIBlock::SetReferenceLine( GSILine* pLine )
352 /*****************************************************************************/
353 {
354     pReferenceLine = pLine;
355 }
356 
357 /*****************************************************************************/
358 void GSIBlock::PrintMessage( ByteString aType, ByteString aMsg, ByteString aPrefix,
359 	ByteString aContext, sal_uLong nLine, ByteString aUniqueId )
360 /*****************************************************************************/
361 {
362     ::PrintMessage( aType, aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
363 }
364 
365 /*****************************************************************************/
366 void GSIBlock::PrintError( ByteString aMsg, ByteString aPrefix,
367 	ByteString aContext, sal_uLong nLine, ByteString aUniqueId )
368 /*****************************************************************************/
369 {
370     PrintMessage( "Error:", aMsg, aPrefix, aContext, nLine, aUniqueId );
371 }
372 
373 /*****************************************************************************/
374 void GSIBlock::PrintList( ParserMessageList *pList, ByteString aPrefix,
375 	GSILine *pLine )
376 /*****************************************************************************/
377 {
378 	sal_uLong i;
379 	for ( i = 0 ; i < pList->Count() ; i++ )
380 	{
381 		ParserMessage *pMsg = pList->GetObject( i );
382 		ByteString aContext;
383 		if ( bPrintContext )
384 		{
385 			if ( pMsg->GetTagBegin() == STRING_NOTFOUND )
386 				aContext = pLine->GetText().Copy( 0, 300 );
387 			else
388 				aContext = pLine->Copy( pMsg->GetTagBegin()-150, 300 );
389 			aContext.EraseTrailingChars(' ');
390 			aContext.EraseLeadingChars(' ');
391 		}
392 
393         PrintMessage( pMsg->Prefix(), pMsg->GetErrorText(), aPrefix, aContext, pLine->GetLineNumber(), pLine->GetUniqId() );
394 	}
395 }
396 
397 /*****************************************************************************/
398 sal_Bool GSIBlock::IsUTF8( const ByteString &aTestee, sal_Bool bFixTags, sal_uInt16 &nErrorPos, ByteString &aErrorMsg, sal_Bool &bHasBeenFixed, ByteString &aFixed ) const
399 /*****************************************************************************/
400 {
401     String aUTF8Tester( aTestee, RTL_TEXTENCODING_UTF8 );
402     if ( STRING_MATCH != (nErrorPos = ByteString( aUTF8Tester, RTL_TEXTENCODING_UTF8 ).Match( aTestee )) )
403     {
404         aUTF8Tester = String( aTestee.GetBuffer(), nErrorPos, RTL_TEXTENCODING_UTF8 );
405         nErrorPos = aUTF8Tester.Len();
406         aErrorMsg = ByteString( "UTF8 Encoding seems to be broken" );
407         return sal_False;
408     }
409 
410     nErrorPos = aUTF8Tester.SearchChar( String::CreateFromAscii( "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f"
411                 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" ).GetBuffer() );
412     if ( nErrorPos != STRING_NOTFOUND )
413     {
414         aErrorMsg = ByteString( "String contains illegal character" );
415         return sal_False;
416     }
417 
418     if ( bFixTags )
419     {
420         bHasBeenFixed = sal_False;
421         aFixed.Erase();
422     }
423 
424     if ( !bAllowKeyIDs )
425     {
426         sal_Bool bIsKeyID = sal_False;
427         sal_Bool bNewId = sal_False;
428         ByteString aID( aTestee );
429 		sal_uInt16 nAfterID = 0;
430 
431 		if ( aTestee.Equals( "{&", 0, 2 ) )
432         {   // check for strings from instset_native like "{&Tahoma8}335795.Installation Wiza ..."
433             sal_uInt16 nTagEnd = aTestee.Search( '}' );
434             if ( nTagEnd != STRING_NOTFOUND )
435             {
436                 if ( bFixTags )
437                     aFixed = aTestee.Copy( 0, nTagEnd+1 );
438                 nErrorPos = nTagEnd+1;
439                 aID = aTestee.Copy( nTagEnd+1 );
440 				nAfterID = nTagEnd+1;
441             }
442         }
443 
444 		ByteString aDelimiter( (String)String( sal_Unicode(0x2016) ), RTL_TEXTENCODING_UTF8 );
445 
446         if ( aID.Equals( aDelimiter, 6, aDelimiter.Len() ) )
447         {   // New KeyId     6 Letters, digits and spechial chars followed by delimiter
448             bNewId = sal_True;
449             nErrorPos = 1;
450             aID = aID.Copy( 0, 6 );
451 			nAfterID += 6;
452 			nAfterID = nAfterID + aDelimiter.Len();
453         }
454         else if ( ( aID.GetChar(6) == '*' ) && aID.Equals( aDelimiter, 7, aDelimiter.Len() ) )
455         {   // New KeyId     6 Letters, digits and spechial chars followed by '*delimiter' to indicate translation in progress
456             bNewId = sal_True;
457             nErrorPos = 1;
458             aID = aID.Copy( 0, 6 );
459 			nAfterID += 7;
460 			nAfterID = nAfterID + aDelimiter.Len();
461         }
462         else if ( aID.GetTokenCount( '.' ) > 1 )
463         {	// test for old KeyIDs       5 to 6 digits followed by a dot   '44373.'
464             bNewId = sal_False;
465             nErrorPos = 1;
466             aID = aID.GetToken( 0, '.' );
467 			nAfterID = nAfterID + aID.Len();
468         }
469 		else
470 		{
471 			aID.Erase();
472 		}
473 
474         if ( bNewId )
475             {
476                 if ( aID.Len() == 6 )
477                 {
478                     bIsKeyID = sal_True;
479                     ByteString aDigits("0123456789abcdefghijklmnopqrstuvwxyz+-<=>");
480                     for ( sal_uInt16 i=0 ; i < aID.Len() ;i++ )
481                     {
482                         if ( aDigits.Search( aID.GetChar(i) ) == STRING_NOTFOUND )
483                             bIsKeyID = sal_False;
484                     }
485                 }
486             }
487         else
488         {
489             if ( aID.Len() > 0 && aID.GetChar(aID.Len()-1) == '*' )
490                 aID.Erase( aID.Len()-1 );
491 
492             if ( aID.IsNumericAscii() && aID.Len() >= 5 )
493                 bIsKeyID = sal_True;
494         }
495 
496         if ( bIsKeyID )
497         {
498             aErrorMsg = ByteString( "String contains KeyID" );
499             if ( bFixTags )
500             {
501                 aFixed += aTestee.Copy( nAfterID );
502                 bHasBeenFixed = sal_True;
503                 aErrorMsg = ByteString( "FIXED String containing KeyID" );
504             }
505             else
506                 aErrorMsg = ByteString( "String contains KeyID" );
507             return sal_False;
508         }
509     }
510 
511     return sal_True;
512 }
513 
514 /*****************************************************************************/
515 sal_Bool GSIBlock::TestUTF8( GSILine* pTestee, sal_Bool bFixTags )
516 /*****************************************************************************/
517 {
518     sal_uInt16 nErrorPos = 0;
519     ByteString aErrorMsg;
520     sal_Bool bError = sal_False;
521     ByteString aFixed;
522     sal_Bool bHasBeenFixed = sal_False;
523     if ( !IsUTF8( pTestee->GetText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
524     {
525         ByteString aContext( pTestee->GetText().Copy( nErrorPos, 20 ) );
526         PrintError( aErrorMsg.Append(" in Text at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
527         bError = sal_True;
528         if ( bHasBeenFixed )
529         {
530             pTestee->SetText( aFixed );
531             pTestee->SetFixed();
532         }
533     }
534     if ( !IsUTF8( pTestee->GetQuickHelpText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
535     {
536         ByteString aContext( pTestee->GetQuickHelpText().Copy( nErrorPos, 20 ) );
537         PrintError( aErrorMsg.Append(" in QuickHelpText at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
538         bError = sal_True;
539         if ( bHasBeenFixed )
540         {
541             pTestee->SetQuickHelpText( aFixed );
542             pTestee->SetFixed();
543         }
544     }
545     if ( !IsUTF8( pTestee->GetTitle(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
546     {
547         ByteString aContext( pTestee->GetTitle().Copy( nErrorPos, 20 ) );
548         PrintError( aErrorMsg.Append(" in Title at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
549         bError = sal_True;
550         if ( bHasBeenFixed )
551         {
552             pTestee->SetTitle( aFixed );
553             pTestee->SetFixed();
554         }
555     }
556     if ( bError )
557         pTestee->NotOK();
558     return !bError;
559 }
560 
561 
562 /*****************************************************************************/
563 sal_Bool GSIBlock::HasSuspiciousChars( GSILine* pTestee, GSILine* pSource )
564 /*****************************************************************************/
565 {
566     sal_uInt16 nPos = 0;
567     if ( !bAllowSuspicious && ( nPos = pTestee->GetText().Search("??")) != STRING_NOTFOUND )
568         if ( pSource->GetText().Search("??") == STRING_NOTFOUND )
569         {
570             String aUTF8Tester = String( pTestee->GetText(), 0, nPos, RTL_TEXTENCODING_UTF8 );
571             sal_uInt16 nErrorPos = aUTF8Tester.Len();
572             ByteString aContext( pTestee->GetText().Copy( nPos, 20 ) );
573             PrintError( ByteString("Found double questionmark in translation only. Looks like an encoding problem at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
574             pTestee->NotOK();
575             return sal_True;
576         }
577 
578     return sal_False;
579 }
580 
581 
582 /*****************************************************************************/
583 sal_Bool GSIBlock::CheckSyntax( sal_uLong nLine, sal_Bool bRequireSourceLine, sal_Bool bFixTags )
584 /*****************************************************************************/
585 {
586 	static LingTest aTester;
587     sal_Bool bHasError = sal_False;
588 
589 	if ( !pSourceLine )
590 	{
591         if ( bRequireSourceLine )
592         {
593     		PrintError( "No source language entry defined!", "File format", "", nLine );
594             bHasBlockError = sal_True;
595         }
596 	}
597 	else
598 	{
599         aTester.CheckReference( pSourceLine );
600         if ( pSourceLine->HasMessages() )
601 		{
602 			PrintList( pSourceLine->GetMessageList(), "ReferenceString", pSourceLine );
603 			pSourceLine->NotOK();
604             bHasError = sal_True;
605 		}
606 	}
607     if ( bReference )
608     {
609         if ( !pReferenceLine )
610         {
611             GSILine *pSource;
612             if ( pSourceLine )
613                 pSource = pSourceLine;
614             else
615                 pSource = GetObject( 0 );   // get some other line
616             if ( pSource )
617                 PrintError( "No reference line found. Entry is new in source file", "File format", "", pSource->GetLineNumber(), pSource->GetUniqId() );
618             else
619                 PrintError( "No reference line found. Entry is new in source file", "File format", "", nLine );
620             bHasBlockError = sal_True;
621 	    }
622 	    else
623 	    {
624 		    if ( pSourceLine && !pSourceLine->Equals( *pReferenceLine ) )
625 		    {
626                 xub_StrLen nPos = pSourceLine->Match( *pReferenceLine );
627                 ByteString aContext( pReferenceLine->Copy( nPos - 5, 15) );
628                 aContext.Append( "\" --> \"" ).Append( pSourceLine->Copy( nPos - 5, 15) );
629                 PrintError( "Source Language Entry has changed.", "File format", aContext, pSourceLine->GetLineNumber(), pSourceLine->GetUniqId() );
630 			    pSourceLine->NotOK();
631                 bHasError = sal_True;
632 		    }
633 	    }
634     }
635 
636     if ( pSourceLine )
637         bHasError |= !TestUTF8( pSourceLine, bFixTags );
638 
639 	sal_uLong i;
640 	for ( i = 0; i < Count(); i++ )
641 	{
642 		aTester.CheckTestee( GetObject( i ), pSourceLine != NULL, bFixTags );
643 		if ( GetObject( i )->HasMessages() || aTester.HasCompareWarnings() )
644         {
645             if ( GetObject( i )->HasMessages() || aTester.GetCompareWarnings().HasErrors() )
646 			    GetObject( i )->NotOK();
647             bHasError = sal_True;
648 			PrintList( GetObject( i )->GetMessageList(), "Translation", GetObject( i ) );
649 			PrintList( &(aTester.GetCompareWarnings()), "Translation Tag Missmatch", GetObject( i ) );
650 		}
651         bHasError |= !TestUTF8( GetObject( i ), bFixTags );
652         if ( pSourceLine )
653             bHasError |= HasSuspiciousChars( GetObject( i ), pSourceLine );
654 	}
655 
656 	return bHasError || bHasBlockError;
657 }
658 
659 void GSIBlock::WriteError( LazySvFileStream &aErrOut, sal_Bool bRequireSourceLine  )
660 {
661     if ( pSourceLine && pSourceLine->IsOK() && bCheckSourceLang && !bHasBlockError )
662         return;
663 
664 	sal_Bool bHasError = sal_False;
665 	sal_Bool bCopyAll = ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) || bHasBlockError;
666 	sal_uLong i;
667 	for ( i = 0; i < Count(); i++ )
668 	{
669 		if ( !GetObject( i )->IsOK() || bCopyAll )
670 		{
671 			bHasError = sal_True;
672             aErrOut.LazyOpen();
673 			aErrOut.WriteLine( *GetObject( i ) );
674 		}
675 	}
676 
677 	if ( pSourceLine && ( bHasError || !pSourceLine->IsOK() ) && !( !bHasError && bCheckTranslationLang ) )
678     {
679         aErrOut.LazyOpen();
680 		aErrOut.WriteLine( *pSourceLine );
681     }
682 }
683 
684 void GSIBlock::WriteCorrect( LazySvFileStream &aOkOut, sal_Bool bRequireSourceLine )
685 {
686 	if ( ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) )
687 		return;
688 
689 	sal_Bool bHasOK = sal_False;
690 	sal_uLong i;
691 	for ( i = 0; i < Count(); i++ )
692 	{
693 		if ( ( GetObject( i )->IsOK() || bCheckSourceLang ) && !bHasBlockError )
694 		{
695 			bHasOK = sal_True;
696             aOkOut.LazyOpen();
697 			aOkOut.WriteLine( *GetObject( i ) );
698 		}
699 	}
700 
701 	if ( ( pSourceLine && pSourceLine->IsOK() && ( Count() || !bCheckTranslationLang ) ) || ( bHasOK && bCheckTranslationLang ) )
702     {
703         aOkOut.LazyOpen();
704 		aOkOut.WriteLine( *pSourceLine );
705     }
706 }
707 
708 void GSIBlock::WriteFixed( LazySvFileStream &aFixOut, sal_Bool /*bRequireSourceLine*/ )
709 {
710     if ( pSourceLine && !pSourceLine->IsFixed() && bCheckSourceLang )
711         return;
712 
713 	sal_Bool bHasFixes = sal_False;
714 	sal_uLong i;
715 	for ( i = 0; i < Count(); i++ )
716 	{
717 		if ( GetObject( i )->IsFixed() )
718 		{
719 			bHasFixes = sal_True;
720             aFixOut.LazyOpen();
721 			aFixOut.WriteLine( *GetObject( i ) );
722 		}
723 	}
724 
725 	if ( pSourceLine && ( bHasFixes || pSourceLine->IsFixed() ) )
726     {
727         aFixOut.LazyOpen();
728 		aFixOut.WriteLine( *pSourceLine );
729     }
730 }
731 
732 
733 /*****************************************************************************/
734 /*****************************************************************************/
735 /*****************************************************************************/
736 /*****************************************************************************/
737 /*****************************************************************************/
738 /*****************************************************************************/
739 /*****************************************************************************/
740 
741 /*****************************************************************************/
742 void Help()
743 /*****************************************************************************/
744 {
745 	fprintf( stdout, "\n" );
746 	fprintf( stdout, "gsicheck Version 1.9.0 (c)1999 - 2006 by SUN Microsystems\n" );
747 	fprintf( stdout, "=========================================================\n" );
748 	fprintf( stdout, "\n" );
749 	fprintf( stdout, "gsicheck checks the syntax of tags in GSI-Files and SDF-Files\n" );
750 	fprintf( stdout, "         checks for inconsistencies and malicious UTF8 encoding\n" );
751 	fprintf( stdout, "         checks tags in Online Help\n" );
752 	fprintf( stdout, "         checks for *new* KeyIDs and relax GID/LID length to %s\n", ByteString::CreateFromInt32(MAX_GID_LID_LEN).GetBuffer() );
753 	fprintf( stdout, "\n" );
754 	fprintf( stdout, "Syntax: gsicheck [ -c ] [-f] [ -we ] [ -wef ErrorFilename ] [ -wc ]\n" );
755 	fprintf( stdout, "                 [ -wcf CorrectFilename ] [ -s | -t ] [ -l LanguageID ]\n" );
756 	fprintf( stdout, "                 [ -r ReferenceFile ] filename\n" );
757 	fprintf( stdout, "\n" );
758 	fprintf( stdout, "-c    Add context to error message (Print the line containing the error)\n" );
759 	fprintf( stdout, "-f    try to fix errors. See also -wf -wff \n" );
760 	fprintf( stdout, "-wf   Write File containing all fixed parts\n" );
761 	fprintf( stdout, "-wff  Same as above but give own filename\n" );
762 	fprintf( stdout, "-we   Write File containing all errors\n" );
763 	fprintf( stdout, "-wef  Same as above but give own filename\n" );
764 	fprintf( stdout, "-wc   Write File containing all correct parts\n" );
765 	fprintf( stdout, "-wcf  Same as above but give own filename\n" );
766 	fprintf( stdout, "-s    Check only source language. Should be used before handing out to vendor.\n" );
767 	fprintf( stdout, "-t    Check only Translation language(s). Should be used before merging.\n" );
768 	fprintf( stdout, "-k    Allow KeyIDs to be present in strings\n" );
769     fprintf( stdout, "-e    disable encoding checks. E.g.: double questionmark \'??\' which may be the\n" );
770     fprintf( stdout, "      result of false conversions\n" );
771 	fprintf( stdout, "-l    ISO Languagecode or numerical 2 digits Identifier of the source language.\n" );
772 	fprintf( stdout, "      Default is en-US. Use \"\" (empty string) or 'none'\n" );
773 	fprintf( stdout, "      to disable source language dependent checks\n" );
774 	fprintf( stdout, "-r    Reference filename to check that source language entries\n" );
775 	fprintf( stdout, "      have not been changed\n" );
776    	fprintf( stdout, "\n" );
777 }
778 
779 /*****************************************************************************/
780 #if defined(UNX) || defined(OS2)
781 int main( int argc, char *argv[] )
782 #else
783 int _cdecl main( int argc, char *argv[] )
784 #endif
785 /*****************************************************************************/
786 {
787 
788 	sal_Bool bError = sal_False;
789 	sal_Bool bPrintContext = sal_False;
790 	sal_Bool bCheckSourceLang = sal_False;
791     sal_Bool bCheckTranslationLang = sal_False;
792     sal_Bool bWriteError = sal_False;
793 	sal_Bool bWriteCorrect = sal_False;
794     sal_Bool bWriteFixed = sal_False;
795     sal_Bool bFixTags = sal_False;
796     sal_Bool bAllowKID = sal_False;
797     sal_Bool bAllowSuspicious = sal_False;
798     String aErrorFilename;
799 	String aCorrectFilename;
800     String aFixedFilename;
801     sal_Bool bFileHasError = sal_False;
802     ByteString aSourceLang( "en-US" );     // English is default
803 	ByteString aFilename;
804     ByteString aReferenceFilename;
805     sal_Bool bReferenceFile = sal_False;
806 	for ( sal_uInt16 i = 1 ; i < argc ; i++ )
807 	{
808 		if ( *argv[ i ] == '-' )
809 		{
810 			switch (*(argv[ i ]+1))
811 			{
812 				case 'c':bPrintContext = sal_True;
813 					break;
814 				case 'w':
815 					{
816 						if ( (*(argv[ i ]+2)) == 'e' )
817                         {
818                             if ( (*(argv[ i ]+3)) == 'f' )
819                                 if ( (i+1) < argc )
820                                 {
821                                     aErrorFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
822         							bWriteError = sal_True;
823                                     i++;
824                                 }
825                                 else
826                                 {
827 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
828 					                bError = sal_True;
829                                 }
830                             else
831        							bWriteError = sal_True;
832                         }
833 						else if ( (*(argv[ i ]+2)) == 'c' )
834                             if ( (*(argv[ i ]+3)) == 'f' )
835                                 if ( (i+1) < argc )
836                                 {
837                                     aCorrectFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
838         							bWriteCorrect = sal_True;
839                                     i++;
840                                 }
841                                 else
842                                 {
843 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
844 					                bError = sal_True;
845                                 }
846                             else
847        							bWriteCorrect = sal_True;
848 						else if ( (*(argv[ i ]+2)) == 'f' )
849                             if ( (*(argv[ i ]+3)) == 'f' )
850                                 if ( (i+1) < argc )
851                                 {
852                                     aFixedFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
853         							bWriteFixed = sal_True;
854                                     bFixTags = sal_True;
855                                     i++;
856                                 }
857                                 else
858                                 {
859 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
860 					                bError = sal_True;
861                                 }
862                             else
863                             {
864        							bWriteFixed = sal_True;
865                                 bFixTags = sal_True;
866                             }
867 						else
868 						{
869 							fprintf( stderr, "\nERROR: Unknown Switch %s!\n\n", argv[ i ] );
870 							bError = sal_True;
871 						}
872 					}
873 					break;
874 				case 's':bCheckSourceLang = sal_True;
875 					break;
876 				case 't':bCheckTranslationLang = sal_True;
877 					break;
878 				case 'l':
879                     {
880                         if ( (i+1) < argc )
881                         {
882                             aSourceLang = ByteString( argv[ i+1 ] );
883                             if ( aSourceLang.EqualsIgnoreCaseAscii( "none" ) )
884                                 aSourceLang.Erase();
885                             i++;
886                         }
887                         else
888                         {
889 					        fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
890 					        bError = sal_True;
891                         }
892                     }
893 					break;
894 				case 'r':
895                     {
896                         if ( (i+1) < argc )
897                         {
898                             aReferenceFilename = argv[ i+1 ];
899                             bReferenceFile = sal_True;
900                             i++;
901                         }
902                         else
903                         {
904 					        fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
905 					        bError = sal_True;
906                         }
907                     }
908 					break;
909 				case 'f':
910                     {
911                         bFixTags = sal_True;
912                     }
913 					break;
914 				case 'k':
915                     {
916                         bAllowKID = sal_True;
917                     }
918 					break;
919 				case 'e':
920                     {
921                         bAllowSuspicious = sal_True;
922                     }
923 					break;
924 				default:
925 					fprintf( stderr, "\nERROR: Unknown Switch %s!\n\n", argv[ i ] );
926 					bError = sal_True;
927 			}
928 		}
929 		else
930 		{
931 			if  ( !aFilename.Len())
932 				aFilename = ByteString( argv[ i ] );
933 			else
934 			{
935 				fprintf( stderr, "\nERROR: Only one filename may be specified!\n\n");
936 				bError = sal_True;
937 			}
938 		}
939 	}
940 
941 
942 	if ( !aFilename.Len() || bError )
943 	{
944 		Help();
945 		exit ( 0 );
946 	}
947 
948     if ( aSourceLang.Len() && !LanguageOK( aSourceLang ) )
949     {
950 	    fprintf( stderr, "\nERROR: The Language '%s' is invalid!\n\n", aSourceLang.GetBuffer() );
951 		Help();
952 		exit ( 1 );
953     }
954 
955 	if ( bCheckSourceLang && bCheckTranslationLang )
956     {
957 	    fprintf( stderr, "\nERROR: The Options -s and -t are mutually exclusive.\nUse only one of them.\n\n" );
958 		Help();
959 		exit ( 1 );
960     }
961 
962 
963 
964 	DirEntry aSource = DirEntry( String( aFilename, RTL_TEXTENCODING_ASCII_US ));
965 	if ( !aSource.Exists()) {
966 		fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", aFilename.GetBuffer() );
967 		exit ( 2 );
968 	}
969 
970 	SvFileStream aGSI( String( aFilename, RTL_TEXTENCODING_ASCII_US ), STREAM_STD_READ );
971 	if ( !aGSI.IsOpen()) {
972 		fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", aFilename.GetBuffer() );
973 		exit ( 3 );
974 	}
975 
976     SvFileStream aReferenceGSI;
977 	if ( bReferenceFile )
978     {
979         DirEntry aReferenceSource = DirEntry( String( aReferenceFilename, RTL_TEXTENCODING_ASCII_US ));
980 	    if ( !aReferenceSource.Exists()) {
981 		    fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", aFilename.GetBuffer() );
982 		    exit ( 2 );
983 	    }
984 
985 	    aReferenceGSI.Open( String( aReferenceFilename, RTL_TEXTENCODING_ASCII_US ), STREAM_STD_READ );
986 	    if ( !aReferenceGSI.IsOpen()) {
987 		    fprintf( stderr, "\nERROR: Could not open Input-File %s!\n\n", aFilename.GetBuffer() );
988 		    exit ( 3 );
989 	    }
990     }
991 
992 	LazySvFileStream aOkOut;
993 	String aBaseName = aSource.GetBase();
994 	if ( bWriteCorrect )
995 	{
996     	if ( !aCorrectFilename.Len() )
997         {
998 		    String sTmpBase( aBaseName );
999 		    sTmpBase += String( "_ok", RTL_TEXTENCODING_ASCII_US );
1000 		    aSource.SetBase( sTmpBase );
1001 		    aCorrectFilename = aSource.GetFull();
1002         }
1003 		aOkOut.SetOpenParams( aCorrectFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1004 	}
1005 
1006 	LazySvFileStream aErrOut;
1007 	if ( bWriteError )
1008 	{
1009     	if ( !aErrorFilename.Len() )
1010         {
1011 		    String sTmpBase( aBaseName );
1012 		    sTmpBase += String( "_err", RTL_TEXTENCODING_ASCII_US );
1013 		    aSource.SetBase( sTmpBase );
1014 		    aErrorFilename = aSource.GetFull();
1015         }
1016 		aErrOut.SetOpenParams( aErrorFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1017 	}
1018 
1019 	LazySvFileStream aFixOut;
1020 	if ( bWriteFixed )
1021 	{
1022     	if ( !aFixedFilename.Len() )
1023         {
1024 		    String sTmpBase( aBaseName );
1025 		    sTmpBase += String( "_fix", RTL_TEXTENCODING_ASCII_US );
1026 		    aSource.SetBase( sTmpBase );
1027 		    aFixedFilename = aSource.GetFull();
1028         }
1029 		aFixOut.SetOpenParams( aFixedFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1030 	}
1031 
1032 
1033     ByteString sReferenceLine;
1034 	GSILine* pReferenceLine = NULL;
1035 	ByteString aOldReferenceId("No Valid ID");   // just set to something which can never be an ID
1036 	sal_uLong nReferenceLine = 0;
1037 
1038 	ByteString sGSILine;
1039 	GSILine* pGSILine = NULL;
1040 	ByteString aOldId("No Valid ID");   // just set to something which can never be an ID
1041 	GSIBlock *pBlock = NULL;
1042 	sal_uLong nLine = 0;
1043 
1044 	while ( !aGSI.IsEof() )
1045     {
1046 		aGSI.ReadLine( sGSILine );
1047 		nLine++;
1048         pGSILine = new GSILine( sGSILine, nLine );
1049         sal_Bool bDelete = sal_True;
1050 
1051 
1052 		if ( pGSILine->Len() )
1053         {
1054             if ( FORMAT_UNKNOWN == pGSILine->GetLineFormat() )
1055 	        {
1056 		        PrintError( "Format of line is unknown. Ignoring!", "Line format", pGSILine->Copy( 0,40 ), bPrintContext, pGSILine->GetLineNumber() );
1057 		        pGSILine->NotOK();
1058 				if ( bWriteError )
1059                 {
1060 					bFileHasError = sal_True;
1061                     aErrOut.LazyOpen();
1062                     aErrOut.WriteLine( *pGSILine );
1063                 }
1064 	        }
1065             else if ( pGSILine->GetLineType().EqualsIgnoreCaseAscii("res-comment") )
1066             {   // ignore comment lines, but write them to Correct Items File
1067 			    if ( bWriteCorrect )
1068                 {
1069                     aOkOut.LazyOpen();
1070                		aOkOut.WriteLine( *pGSILine );
1071                 }
1072             }
1073             else
1074             {
1075                 ByteString aId = pGSILine->GetUniqId();
1076 			    if ( aId != aOldId )
1077                 {
1078 				    if ( pBlock )
1079 				    {
1080 					    bFileHasError |= pBlock->CheckSyntax( nLine, aSourceLang.Len() != 0, bFixTags );
1081 
1082 					    if ( bWriteError )
1083 						    pBlock->WriteError( aErrOut, aSourceLang.Len() != 0 );
1084 					    if ( bWriteCorrect )
1085 						    pBlock->WriteCorrect( aOkOut, aSourceLang.Len() != 0 );
1086 					    if ( bWriteFixed )
1087 						    pBlock->WriteFixed( aFixOut, aSourceLang.Len() != 0 );
1088 
1089 					    delete pBlock;
1090 				    }
1091 				    pBlock = new GSIBlock( bPrintContext, bCheckSourceLang, bCheckTranslationLang, bReferenceFile, bAllowKID, bAllowSuspicious );
1092 
1093 				    aOldId = aId;
1094 
1095 
1096                     // find corresponding line in reference file
1097                     if ( bReferenceFile )
1098                     {
1099                         sal_Bool bContinueSearching = sal_True;
1100                         while ( ( !aReferenceGSI.IsEof() || pReferenceLine ) && bContinueSearching )
1101                         {
1102                             if ( !pReferenceLine )
1103                             {
1104 		                        aReferenceGSI.ReadLine( sReferenceLine );
1105 		                        nReferenceLine++;
1106                                 pReferenceLine = new GSILine( sReferenceLine, nReferenceLine );
1107                             }
1108                             if ( pReferenceLine->GetLineFormat() != FORMAT_UNKNOWN )
1109                             {
1110                                 if ( pReferenceLine->GetUniqId() == aId && pReferenceLine->GetLanguageId().Equals( aSourceLang ) )
1111                                 {
1112                                     pBlock->SetReferenceLine( pReferenceLine );
1113                                     pReferenceLine = NULL;
1114                                 }
1115                                 else if ( pReferenceLine->GetUniqId() > aId )
1116                                 {
1117 //                                    if ( pGSILine->GetLanguageId() == aSourceLang )
1118 //                    		            PrintError( "No reference line found. Entry is new in source file", "File format", "", bPrintContext, pGSILine->GetLineNumber(), aId );
1119                                     bContinueSearching = sal_False;
1120                                 }
1121                                 else
1122                                 {
1123                                     if ( pReferenceLine->GetUniqId() < aId  && pReferenceLine->GetLanguageId().Equals( aSourceLang ) )
1124                 		                PrintError( "No Entry in source file found. Entry has been removed from source file", "File format", "", bPrintContext, pGSILine->GetLineNumber(), pReferenceLine->GetUniqId() );
1125                                     delete pReferenceLine;
1126                                     pReferenceLine = NULL;
1127                                 }
1128                             }
1129                             else
1130                             {
1131                                 delete pReferenceLine;
1132                                 pReferenceLine = NULL;
1133                             }
1134 
1135                         }
1136                     }
1137 
1138     		    }
1139 
1140 			    pBlock->InsertLine( pGSILine, aSourceLang );
1141                 bDelete = sal_False;
1142             }
1143 		}
1144         if ( bDelete )
1145             delete pGSILine;
1146 
1147 	}
1148 	if ( pBlock )
1149 	{
1150 		bFileHasError |= pBlock->CheckSyntax( nLine, aSourceLang.Len() != 0, bFixTags );
1151 
1152 		if ( bWriteError )
1153 			pBlock->WriteError( aErrOut, aSourceLang.Len() != 0 );
1154 		if ( bWriteCorrect )
1155 			pBlock->WriteCorrect( aOkOut, aSourceLang.Len() != 0 );
1156 		if ( bWriteFixed )
1157 			pBlock->WriteFixed( aFixOut, aSourceLang.Len() != 0 );
1158 
1159 		delete pBlock;
1160 	}
1161 	aGSI.Close();
1162 
1163 	if ( bWriteError )
1164 		aErrOut.Close();
1165 	if ( bWriteCorrect )
1166 		aOkOut.Close();
1167 	if ( bWriteFixed )
1168 		aFixOut.Close();
1169 
1170     if ( bFileHasError )
1171         return 55;
1172     else
1173 	    return 0;
1174 }
1175