xref: /trunk/main/l10ntools/source/tagtest.cxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_l10ntools.hxx"
30 #include <tools/string.hxx>
31 #include "tagtest.hxx"
32 
33 #if OSL_DEBUG_LEVEL > 1
34 #include <stdio.h>
35 #endif
36 
37 #include "gsicheck.hxx"
38 
39 #define HAS_FLAG( nFlags, nFlag )       ( ( nFlags & nFlag ) != 0 )
40 #define SET_FLAG( nFlags, nFlag )       ( nFlags |= nFlag )
41 #define RESET_FLAG( nFlags, nFlag )     ( nFlags &= ~nFlag )    // ~ = Bitweises NOT
42 
43 
44 
45 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList )
46 : bClosed(sal_False)
47 , bCloseTag(sal_False)
48 , bIsBroken(sal_False)
49 , bHasBeenFixed(sal_False)
50 , bDone(sal_False)
51 , aTokenString( paStr )
52 , nId( pnId )
53 , nPos(nP)
54 {
55     if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND )
56         SplitTag( rErrorList );
57 }
58 
59 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR };
60 
61 /*
62                                                       \<  link  href  =  \"text\"  name  =  \"C\"  \>
63 START               ' ' ->  HAS_TAG_NAME
64 START               '/' ->  CLOSED
65 START               '/' ->  CLOSETAG    - no Portion (starting with /)
66 START               '>' ->  FINISHED
67 HAS_TAG_NAME        '=' ->  HAS_PROP_NAME_EQ
68 HAS_TAG_NAME        ' ' ->  HAS_PROP_NAME_SP
69 HAS_TAG_NAME        '/' ->  CLOSED
70 HAS_TAG_NAME        '>' ->  FINISHED
71 HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
72 HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
73 HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
74 HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
75 INSIDE_STRING       ' ' ->  INSIDE_STRING
76 INSIDE_STRING       '=' ->  INSIDE_STRING
77 INSIDE_STRING       '>' ->  INSIDE_STRING
78 INSIDE_STRING       '"' ->  PROP_FINISHED
79 PROP_FINISHED       ' ' ->  HAS_TAG_NAME
80 PROP_FINISHED       '/' ->  CLOSED
81 PROP_FINISHED       '>' ->  FINISHED
82 CLOSED              ' ' ->  CLOSED_SPACE
83 CLOSED              '>' ->  FINISHED
84 CLOSED_SPACE        '>' ->  FINISHED
85 
86 CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
87 CLOSETAG            '>' ->  FINISHED
88 CLOSETAG_HAS_TAG_NAME  '>' ->  FINISHED
89 
90 */
91 void TokenInfo::SplitTag( ParserMessageList &rErrorList )
92 {
93     sal_uInt16 nLastPos = 2;    // skip initial  \<
94     sal_uInt16 nCheckPos = nLastPos;
95     String aDelims( String::CreateFromAscii( " \\=>/" ) );
96     String aPortion;
97     String aValue;      // store the value of a property
98     ByteString aName;   // store the name of a property/tag
99     sal_Bool bCheckName = sal_False;
100     sal_Bool bCheckEmpty = sal_False;
101     sal_Unicode cDelim;
102     tagcheck aState = TC_START;
103 
104     // skip blanks
105     while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
106         nLastPos++;
107 
108     nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
109     while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) )
110     {
111         aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos );
112 
113         if ( aTokenString.GetChar( nCheckPos ) == '\\' )
114             nCheckPos++;
115 
116         cDelim = aTokenString.GetChar( nCheckPos );
117         nCheckPos++;
118 
119         switch ( aState )
120         {
121 //            START           ' ' ->  HAS_TAG_NAME
122 //            START           '/' ->  CLOSED
123 //            START           '>' ->  FINISHED
124             case TC_START:
125                 aTagName = aPortion;
126                 switch ( cDelim )
127                 {
128                     case ' ':  aState = TC_HAS_TAG_NAME;
129                                bCheckName = sal_True;
130                                break;
131                     case '/':
132                         {
133                             if ( aPortion.Len() == 0 )
134                             {
135                                 aState = TC_CLOSETAG;
136                             }
137                             else
138                             {
139                                 aState = TC_CLOSED;
140                                 bCheckName = sal_True;
141                             }
142                         }
143                         break;
144                     case '>':  aState = TC_FINISHED;
145                                bCheckName = sal_True;
146                                break;
147                     default:   aState = TC_ERROR;
148                 }
149                 break;
150 
151 //            HAS_TAG_NAME    '=' ->  HAS_PROP_NAME_EQ
152 //            HAS_TAG_NAME    ' ' ->  HAS_PROP_NAME_SP
153 //            HAS_TAG_NAME    '/' ->  CLOSED
154 //            HAS_TAG_NAME    '>' ->  FINISHED
155             case TC_HAS_TAG_NAME:
156                 switch ( cDelim )
157                 {
158                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
159                                bCheckName = sal_True;
160                                break;
161                     case ' ':  aState = TC_HAS_PROP_NAME_SP;
162                                bCheckName = sal_True;
163                                break;
164                     case '/':  aState = TC_CLOSED;
165                                bCheckEmpty = sal_True;
166                                break;
167                     case '>':  aState = TC_FINISHED;
168                                bCheckEmpty = sal_True;
169                                break;
170                     default:   aState = TC_ERROR;
171                 }
172                 break;
173 
174 //            HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
175             case TC_HAS_PROP_NAME_SP:
176                 switch ( cDelim )
177                 {
178                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
179                                bCheckEmpty = sal_True;
180                                break;
181                     default:   aState = TC_ERROR;
182                 }
183                 break;
184 
185 //            HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
186 //            HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
187             case TC_HAS_PROP_NAME_EQ:
188                 switch ( cDelim )
189                 {
190                     case ' ':  aState = TC_HAS_PROP_NAME_EQ_SP;
191                                bCheckEmpty = sal_True;
192                                break;
193                     case '\"': aState = TC_INSIDE_STRING;
194                                bCheckEmpty = sal_True;
195                                aValue.Erase();
196                                break;
197                     default:   aState = TC_ERROR;
198                 }
199                 break;
200 
201 //            HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
202             case TC_HAS_PROP_NAME_EQ_SP:
203                 switch ( cDelim )
204                 {
205                     case '\"': aState = TC_INSIDE_STRING;
206                                bCheckEmpty = sal_True;
207                                aValue.Erase();
208                                break;
209                     default:   aState = TC_ERROR;
210                 }
211                 break;
212 
213 //            INSIDE_STRING    *  ->  INSIDE_STRING
214 //            INSIDE_STRING   '"' ->  PROP_FINISHED
215             case TC_INSIDE_STRING:
216                 switch ( cDelim )
217                 {
218                     case '\"':
219                         {
220                             aState = TC_PROP_FINISHED;
221                             aValue += aPortion;
222                             if ( aProperties.find( aName ) == aProperties.end() )
223                             {
224                                 if ( !IsPropertyValueValid( aName, aValue ) )
225                                 {
226                                     rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this );
227                                     bIsBroken = sal_True;
228                                 }
229                                 aProperties[ aName ] = aValue;
230                             }
231                             else
232                             {
233                                 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this );
234                                 bIsBroken = sal_True;
235                             }
236                         }
237                                break;
238                     default:
239                         {
240                             aState = TC_INSIDE_STRING;
241                             aValue += aPortion;
242                             aValue += cDelim;
243                         }
244                 }
245                 break;
246 
247 //            PROP_FINISHED   ' ' ->  HAS_TAG_NAME
248 //            PROP_FINISHED   '/' ->  CLOSED
249 //            PROP_FINISHED   '>' ->  FINISHED
250             case TC_PROP_FINISHED:
251                 switch ( cDelim )
252                 {
253                     case ' ': aState = TC_HAS_TAG_NAME;
254                                bCheckEmpty = sal_True;
255                                break;
256                     case '/': aState = TC_CLOSED;
257                                bCheckEmpty = sal_True;
258                                break;
259                     case '>': aState = TC_FINISHED;
260                                bCheckEmpty = sal_True;
261                                break;
262                     default:   aState = TC_ERROR;
263                 }
264                 break;
265 
266 //            CLOSED          ' ' ->  CLOSED_SPACE
267 //            CLOSED          '>' ->  FINISHED
268             case TC_CLOSED:
269                 switch ( cDelim )
270                 {
271                     case ' ': aState = TC_CLOSED_SPACE;
272                                bCheckEmpty = sal_True;
273                                bClosed = sal_True;
274                                break;
275                     case '>': aState = TC_FINISHED;
276                                bCheckEmpty = sal_True;
277                                break;
278                     default:   aState = TC_ERROR;
279                 }
280                 break;
281 
282 //            CLOSED_SPACE    '>' ->  FINISHED
283             case TC_CLOSED_SPACE:
284                 switch ( cDelim )
285                 {
286                     case '>': aState = TC_FINISHED;
287                                bCheckEmpty = sal_True;
288                                break;
289                     default:   aState = TC_ERROR;
290                 }
291                 break;
292 
293 // CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
294 // CLOSETAG            '>' ->  FINISHED
295             case TC_CLOSETAG:
296                 bCloseTag = sal_True;
297                 switch ( cDelim )
298                 {
299                     case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME;
300                                aTagName = aPortion;
301                                bCheckName = sal_True;
302                                break;
303                     case '>': aState = TC_FINISHED;
304                                aTagName = aPortion;
305                                bCheckName = sal_True;
306                                break;
307                     default:   aState = TC_ERROR;
308                 }
309                 break;
310 
311 // CLOSETAG_HAS_TAG_NAME       '>' ->  FINISHED
312             case TC_CLOSETAG_HAS_TAG_NAME:
313                 switch ( cDelim )
314                 {
315                     case '>': aState = TC_FINISHED;
316                                bCheckEmpty = sal_True;
317                                break;
318                     default:   aState = TC_ERROR;
319                 }
320                 break;
321 
322 
323             default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this );
324                      bIsBroken = sal_True;
325 
326         }
327 
328         if ( bCheckName )
329         {
330             if ( aPortion.Len() == 0 )
331             {
332                 rErrorList.AddError( 25, "Tag/Property name missing ", *this );
333                 bIsBroken = sal_True;
334             }
335             else
336             {
337                 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 );
338                 // "a-zA-Z_-.0-9"
339                 xub_StrLen nCount;
340                 sal_Bool bBroken = sal_False;
341                 const sal_Char* aBuf = aName.GetBuffer();
342                 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ )
343                 {
344                     bBroken = ! (   ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' )
345                                 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' )
346                                 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' )
347                                 ||( aBuf[nCount] == '_' )
348                                 ||( aBuf[nCount] == '-' )
349                                 ||( aBuf[nCount] == '.' )
350                                 );
351                 }
352 
353                 if ( bBroken )
354                 {
355                     rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this );
356                     bIsBroken = sal_True;
357                 }
358             }
359 
360             bCheckName = sal_False;
361         }
362 
363         if ( bCheckEmpty )
364         {
365             if ( aPortion.Len() )
366             {
367                 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this );
368                 bIsBroken = sal_True;
369             }
370             bCheckEmpty = sal_False;
371         }
372 
373 
374         nLastPos = nCheckPos;
375 
376         // skip further blanks
377         if ( cDelim == ' ' && aState != TC_INSIDE_STRING )
378             while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
379                 nLastPos++;
380 
381         nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
382     }
383     if ( aState != TC_FINISHED )
384     {
385         rErrorList.AddError( 25, "Parsing error in Tag ", *this );
386         bIsBroken = sal_True;
387     }
388 }
389 
390 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const
391 {
392     if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) )
393         return sal_False;
394     if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") )
395         return sal_False;
396     if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) )
397         return sal_False;
398 
399     return sal_True;
400 }
401 
402 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const
403 {
404 /*  removed due to i56740
405     if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) )
406     {
407         return aValue.EqualsAscii("sys") ||
408                aValue.EqualsAscii("appl") ||
409                aValue.EqualsAscii("distrib");
410     } */
411     if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) )
412     {
413         return /*!aValue.EqualsAscii("OS2") &&  removed due to i56740 */
414                !aValue.EqualsAscii("");
415     }
416 
417     // we don't know any better so we assume it to be OK
418     return sal_True;
419 }
420 
421 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const
422 {
423     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) )
424         return sal_False;
425     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) )
426     {   // check for external reference
427         if (  aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" )
428            || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" )
429            || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) )
430             return sal_False;
431         else
432             return sal_True;
433     }
434     return sal_True;
435 }
436 
437 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const
438 {
439     // name everything that is allowed to be fixed automatically here
440     if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" ))
441       || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ))
442       || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" ))
443       || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" ))
444       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" ))
445       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) ))
446         return sal_True;
447     return sal_False;
448 }
449 
450 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const
451 {
452     // check if tags are equal
453     // check if all existing properties are in the translation as well and
454     // wether they have a matching content (the same in most cases)
455 
456     if ( nId != rInfo.nId )
457         return sal_False;
458 
459     if ( !aTagName.Equals( rInfo.aTagName ) )
460         return sal_False;
461 
462     // If one of the tags has formating errors already it does make no sense to check here, so return right away
463     if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) )
464         return sal_True;
465 
466     StringHashMap::const_iterator iProp;
467     for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
468     {
469         if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() )
470         {
471             if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) )
472             {
473                 if ( IsPropertyInvariant( iProp->first, iProp->second ) )
474                 {
475                     if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) )
476                     {
477                         if ( bGenErrors )
478                         {
479                             if ( bFixTags && IsPropertyFixable( iProp->first ) )
480                             {
481                                 rInfo.aProperties.find( iProp->first )->second = iProp->second;
482                                 rInfo.SetHasBeenFixed();
483                                 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this );
484                             }
485                             else
486                                 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this );
487                         }
488                         else return sal_False;
489                     }
490                 }
491             }
492         }
493         else
494         {
495             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
496             {
497                 if ( bGenErrors )
498                     rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this );
499                 else return sal_False;
500             }
501         }
502     }
503     for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp )
504     {
505         if ( aProperties.find( iProp->first ) == aProperties.end() )
506         {
507             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
508             {
509                 if ( bGenErrors )
510                     rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo );
511                 else return sal_False;
512             }
513         }
514     }
515 
516     // if we reach here eather
517     //   the tags match completely or
518     //   the tags match but not the properties and we generated errors for that
519     return sal_True;
520 }
521 
522 String TokenInfo::GetTagName() const
523 {
524     return aTagName;
525 }
526 
527 String TokenInfo::MakeTag() const
528 {
529     String aRet;
530     aRet.AppendAscii("\\<");
531     if ( bCloseTag )
532         aRet.AppendAscii("/");
533     aRet.Append( GetTagName() );
534     StringHashMap::const_iterator iProp;
535 
536     for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
537     {
538         aRet.AppendAscii(" ");
539         aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) );
540         aRet.AppendAscii("=\\\"");
541         aRet.Append( iProp->second );
542         aRet.AppendAscii("\\\"");
543     }
544     if ( bClosed )
545         aRet.AppendAscii("/");
546     aRet.AppendAscii("\\>");
547     return aRet;
548 }
549 
550 
551 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
552 {
553     Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND );
554 }
555 
556 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
557 {
558     Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND );
559 }
560 
561 sal_Bool ParserMessageList::HasErrors()
562 {
563     sal_uInt16 i;
564     for ( i=0 ; i < Count() ; i++ )
565         if ( GetObject( i )->IsError() )
566             return sal_True;
567     return sal_False;
568 }
569 
570 struct Tag
571 {
572     String GetName() const { return String::CreateFromAscii( pName ); };
573     const char* pName;
574     TokenId nTag;
575 };
576 
577 
578 static const Tag aKnownTags[] =
579 {
580 /*  commenting oldstyle tags
581 //  { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
582     { "<#BOLD>", TAG_BOLDON },
583     { "<#/BOLD>", TAG_BOLDOFF },
584     { "<#ITALIC>", TAG_ITALICON },
585     { "<#/ITALIC>", TAG_ITALICOFF },
586     { "<#UNDER>", TAG_UNDERLINEON },
587     { "<#/UNDER>", TAG_UNDERLINEOFF },
588 
589 //  { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
590     { "<#HELPID>", TAG_HELPID },
591     { "<#MODIFY>", TAG_MODIFY },
592     { "<#REFNR>", TAG_REFNR },
593 
594 //  { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
595     { "<#NAME>", TAG_NAME },
596     { "<#HREF>", TAG_HREF },
597     { "<#AVIS>", TAG_AVIS },
598     { "<#AHID>", TAG_AHID },
599     { "<#AEND>", TAG_AEND },
600 
601     { "<#TITEL>", TAG_TITEL },
602     { "<#KEY>", TAG_KEY },
603     { "<#INDEX>", TAG_INDEX },
604 
605     { "<#REFSTART>", TAG_REFSTART },
606 
607     { "<#GRAPHIC>", TAG_GRAPHIC },
608     { "<#NEXTVERSION>", TAG_NEXTVERSION },
609 
610     //  { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
611     { "<#WIN>", TAG_WIN },
612     { "<#UNIX>", TAG_UNIX },
613     { "<#MAC>", TAG_MAC },
614     { "<#OS2>", TAG_OS2 },
615 
616 //  { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
617     { "<#WRITER>", TAG_WRITER },
618     { "<#CALC>", TAG_CALC },
619     { "<#DRAW>", TAG_DRAW },
620     { "<#IMPRESS>", TAG_IMPRESS },
621     { "<#SCHEDULE>", TAG_SCHEDULE },
622     { "<#IMAGE>", TAG_IMAGE },
623     { "<#MATH>", TAG_MATH },
624     { "<#CHART>", TAG_CHART },
625     { "<#OFFICE>", TAG_OFFICE },
626   */
627 //  { "<#TAG_GROUP_META>", TAG_GROUP_META },
628     { "$[officefullname]", TAG_OFFICEFULLNAME },
629     { "$[officename]", TAG_OFFICENAME },
630     { "$[officepath]", TAG_OFFICEPATH },
631     { "$[officeversion]", TAG_OFFICEVERSION },
632     { "$[portalname]", TAG_PORTALNAME },
633     { "$[portalfullname]", TAG_PORTALFULLNAME },
634     { "$[portalpath]", TAG_PORTALPATH },
635     { "$[portalversion]", TAG_PORTALVERSION },
636     { "$[portalshortname]", TAG_PORTALSHORTNAME },
637 /*  commenting oldstyle tags
638 //  { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
639     { "<#REFINSERT>", TAG_REFINSERT },
640 
641 //  { "<#GROUP_MULTI>", TAG_GROUP_MULTI },
642     { "<#END>", TAG_END },
643     { "<#ELSE>", TAG_ELSE },
644     { "<#VERSIONEND>", TAG_VERSIONEND },
645     { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
646     { "<Common Tag>", TAG_COMMONSTART },
647     { "</Common Tag>", TAG_COMMONEND },
648 
649     { "<no more tags>", TAG_NOMORETAGS },
650     { "", TAG_UNKNOWN_TAG },
651 };
652 
653 
654 SimpleParser::SimpleParser()
655 : nPos( 0 )
656 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS )
657 {
658 }
659 
660 void SimpleParser::Parse( String PaSource )
661 {
662     aSource = PaSource;
663     nPos = 0;
664     aLastToken.Erase();
665     aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
666     aTokenList.Clear();
667 };
668 
669 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList )
670 {
671     TokenInfo aResult;
672     sal_uInt16 nTokenStartPos = 0;
673     if ( aNextTag.nId != TAG_NOMORETAGS )
674     {
675         aResult = aNextTag;
676         aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
677     }
678     else
679     {
680         aLastToken = GetNextTokenString( rErrorList, nTokenStartPos );
681         if ( aLastToken.Len() == 0 )
682             return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
683 
684         // do we have a \< ... \> style tag?
685         if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) )
686         {
687             // check for paired \" \"
688             bool bEven = true;
689             sal_uInt16 nQuotePos = 0;
690             sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" );
691             sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" );    // this is only to kick out quoted backslashes
692             while ( nQuotedQuotesPos != STRING_NOTFOUND )
693             {
694                 if ( nQuotedBackPos <= nQuotedQuotesPos )
695                     nQuotePos = nQuotedBackPos+2;
696                 else
697                 {
698                     nQuotePos = nQuotedQuotesPos+2;
699                     bEven = !bEven;
700                 }
701                 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos );
702                 nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos );    // this is only to kick out quoted backslashes
703             }
704             if ( !bEven )
705             {
706                 rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) );
707             }
708 
709             // check if we have an end-tag or a start-tag
710             sal_uInt16 nNonBlankStartPos,nNonBlankEndPos;
711             nNonBlankStartPos = 2;
712             while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' )
713                 nNonBlankStartPos++;
714             if ( aLastToken.GetChar(nNonBlankStartPos) == '/' )
715                 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList );
716             else
717             {
718                 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList );
719                 nNonBlankEndPos = aLastToken.Len() -3;
720                 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' )
721                     nNonBlankEndPos--;
722                 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' )
723                     aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList );
724             }
725         }
726         else
727         {
728             sal_uInt16 i = 0;
729             while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
730                 aLastToken != aKnownTags[i].GetName() )
731                 i++;
732             aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos );
733         }
734     }
735 
736     if ( aResult.nId == TAG_UNKNOWN_TAG )
737         aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken );
738     aTokenList.Insert( aResult, LIST_APPEND );
739     return aResult;
740 }
741 
742 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos )
743 {
744 //  sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos );
745     sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos );
746     sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos );
747     sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos );    // this is only to kick out quoted backslashes
748 
749     rTagStartPos = 0;
750 
751 /* removing since a \<... is not likely
752     // check if the tag starts with a letter to avoid things like <> <= ... >
753     while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) )
754         nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 );
755 */
756     if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos )
757         return String();  // no more tokens
758 
759     if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos )  // <= to make sure \\ is always handled first
760     {   // Skip quoted Backslash
761         nPos = nStyle4StartPos +2;
762         return GetNextTokenString( rErrorList, rTagStartPos );
763     }
764 
765 /*  if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos )  // <= to make sure our spechial tags are recognized before all others
766     {   // test for <# ... > style tokens
767         sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos );
768         if ( nEndPos == STRING_NOTFOUND )
769         {   // Token is incomplete. Skip start and search for better ones
770             nPos = nStyle1StartPos +2;
771             return GetNextTokenString( rErrorList, rTagStartPos );
772         }
773         nPos = nEndPos;
774         rTagStartPos = nStyle1StartPos;
775         return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii();
776     }
777     else*/ if ( nStyle2StartPos < nStyle3StartPos )
778     {   // test for $[ ... ] style tokens
779         sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos);
780         if ( nEndPos == STRING_NOTFOUND )
781         {   // Token is incomplete. Skip start and search for better ones
782             nPos = nStyle2StartPos +2;
783             return GetNextTokenString( rErrorList, rTagStartPos );
784         }
785         nPos = nEndPos;
786         rTagStartPos = nStyle2StartPos;
787         return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 );
788     }
789     else
790     {   // test for \< ... \> style tokens
791         sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos);
792         sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos );    // this is only to kick out quoted backslashes
793         while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND )
794         {
795             nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2);
796             nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 );    // this is only to kick out quoted backslashes
797         }
798         if ( nEndPos == STRING_NOTFOUND )
799         {   // Token is incomplete. Skip start and search for better ones
800             nPos = nStyle3StartPos +2;
801             ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " );
802             rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) );
803             return GetNextTokenString( rErrorList, rTagStartPos );
804         }
805         // check for paired quoted "    -->   \"sometext\"
806 
807         nPos = nEndPos;
808         rTagStartPos = nStyle3StartPos;
809         return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 );
810     }
811 }
812 
813 String SimpleParser::GetLexem( TokenInfo const &aToken )
814 {
815     if ( aToken.aTokenString.Len() )
816         return aToken.aTokenString;
817     else
818     {
819         sal_uInt16 i = 0;
820         while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
821             aKnownTags[i].nTag != aToken.nId )
822             i++;
823 
824         return aKnownTags[i].GetName();
825     }
826 }
827 
828 TokenParser::TokenParser()
829 : pErrorList( NULL )
830 {}
831 
832 void TokenParser::Parse( const String &aCode, ParserMessageList* pList )
833 {
834     pErrorList = pList;
835 
836     //Scanner initialisieren
837     aParser.Parse( aCode );
838 
839     //erstes Symbol holen
840     aTag = aParser.GetNextToken( *pErrorList );
841 
842     nPfCaseOptions = 0;
843     nAppCaseOptions = 0;
844     bPfCaseActive = sal_False;
845     bAppCaseActive = sal_False;
846 
847     nActiveRefTypes = 0;
848 
849     //Ausfuehren der Start-Produktion
850     Paragraph();
851 
852     //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
853     //kein Fehler aufgetreten
854     //=> es wurde ein einleitendes Tag vergessen
855     if ( aTag.nId != TAG_NOMORETAGS )
856     {
857         switch ( aTag.nId )
858         {
859             case TAG_END:
860                 {
861                     ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag );
862                 }
863                 break;
864             case TAG_BOLDOFF:
865                 {
866                     ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag );
867                 }
868                 break;
869             case TAG_ITALICOFF:
870                 {
871                     ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag );
872                 }
873                 break;
874             case TAG_UNDERLINEOFF:
875                 {
876                     ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag );
877                 }
878                 break;
879 /*          case TAG_MISSPARENTHESIS:
880                 {
881                     ParseError( 14, "missing closing parenthesis '>'", aTag );
882                 }
883                 break;*/
884             case TAG_AEND:
885                 {
886                     ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag );
887                 }
888                 break;
889             case TAG_ELSE:
890                 {
891                     ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag );
892                 }
893                 break;
894             case TAG_UNKNOWN_TAG:
895                 {
896                     ParseError( 6, "unknown Tag", aTag );
897                 }
898                 break;
899             default:
900                 {
901                     ParseError( 6, "unexpected Tag", aTag );
902                 }
903         }
904     }
905     pErrorList = NULL;
906 }
907 
908 void TokenParser::Paragraph()
909 {
910     switch ( aTag.nId )
911     {
912         case TAG_GRAPHIC:
913         case TAG_NEXTVERSION:
914             {
915                 TagRef();
916                 Paragraph();
917             }
918             break;
919         case TAG_AVIS:
920         case TAG_AHID:
921             {
922                 TagRef();
923                 Paragraph();
924             }
925             break;
926         case TAG_HELPID:
927             {
928                 SimpleTag();
929                 Paragraph();
930             }
931             break;
932         case TAG_OFFICEFULLNAME:
933         case TAG_OFFICENAME:
934         case TAG_OFFICEPATH:
935         case TAG_OFFICEVERSION:
936         case TAG_PORTALNAME:
937         case TAG_PORTALFULLNAME:
938         case TAG_PORTALPATH:
939         case TAG_PORTALVERSION:
940         case TAG_PORTALSHORTNAME:
941             {
942                 SimpleTag();
943                 Paragraph();
944             }
945             break;
946         case TAG_REFINSERT:
947             {
948                 SimpleTag();
949                 Paragraph();
950             }
951             break;
952         case TAG_BOLDON:
953         case TAG_ITALICON:
954         case TAG_UNDERLINEON:
955         case TAG_COMMONSTART:
956             {
957                 TagPair();
958                 Paragraph();
959             }
960             break;
961         case TAG_HREF:
962         case TAG_NAME:
963         case TAG_KEY:
964         case TAG_INDEX:
965         case TAG_TITEL:
966         case TAG_REFSTART:
967             {
968                 TagRef();
969                 Paragraph();
970             }
971             break;
972         case TAG_OS2:
973         case TAG_WIN:
974         case TAG_UNIX:
975         case TAG_MAC: //...
976             {
977                 if ( ! bPfCaseActive )
978                 {
979                     //PfCases duerfen nicht verschachtelt sein:
980                     bPfCaseActive = sal_True;
981                     PfCase();
982 
983                     //So jetzt kann wieder ein PfCase kommen:
984                     bPfCaseActive = sal_False;
985                     Paragraph();
986                 }
987             }
988             break;
989         case TAG_WRITER:
990         case TAG_CALC:
991         case TAG_DRAW:
992         case TAG_IMPRESS:
993         case TAG_SCHEDULE:
994         case TAG_IMAGE:
995         case TAG_MATH:
996         case TAG_CHART:
997         case TAG_OFFICE:
998             {
999                 if ( !bAppCaseActive )
1000                 {
1001                     //AppCases duerfen nicht verschachtelt sein:
1002                     bAppCaseActive = sal_True;
1003                     AppCase();
1004 
1005                     //jetzt koennen wieder AppCases kommen:
1006                     bAppCaseActive = sal_False;
1007                     Paragraph();
1008                 }
1009             }
1010             break;
1011 
1012         //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1013             //nichts tun wg. epsilon-Prod.
1014     }
1015 }
1016 
1017 void TokenParser::PfCase()
1018 {
1019 
1020     //Produktion:
1021     //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1022 
1023     PfCaseBegin();
1024 
1025     //Jetzt ist eine PfCase-Produktion aktiv:
1026     Paragraph();
1027     switch ( aTag.nId )
1028     {
1029         case TAG_ELSE:
1030         case TAG_END:
1031             {
1032                 CaseEnd();
1033             }
1034             break;
1035         case TAG_OS2:
1036         case TAG_WIN:
1037         case TAG_UNIX:
1038         case TAG_MAC: //First (PfBegin)
1039             {
1040                 PfCase();
1041             }
1042             break;
1043         default:
1044             ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag );
1045     }
1046     //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1047     nPfCaseOptions = 0;
1048 }
1049 
1050 void TokenParser::PfCaseBegin()
1051 {
1052     switch ( aTag.nId )
1053     {
1054         case TAG_OS2:
1055         case TAG_WIN:
1056         case TAG_UNIX:
1057         case TAG_MAC:
1058             {
1059                 //Token darf noch nicht vorgekommen sein im
1060                 //aktuellen Plattform-Case:
1061                 if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1062                 {
1063                     SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) );
1064                     match( aTag, aTag );
1065                 }
1066                 else {
1067                     ParseError( 9, "Tag defined twice in the same platform-case", aTag );
1068                 }
1069             }
1070     }
1071 }
1072 
1073 void TokenParser::AppCase()
1074 {
1075 
1076     //Produktion:
1077     //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1078 
1079 
1080     AppCaseBegin();
1081 
1082     Paragraph();
1083 
1084     switch ( aTag.nId )
1085     {
1086         case TAG_ELSE:
1087         case TAG_END:
1088             {
1089                 CaseEnd();
1090             }
1091             break;
1092         case TAG_WRITER:
1093         case TAG_DRAW:
1094         case TAG_CALC:
1095         case TAG_IMAGE:
1096         case TAG_MATH:
1097         case TAG_CHART:
1098         case TAG_OFFICE:
1099         case TAG_IMPRESS:
1100         case TAG_SCHEDULE: //First (AppBegin)
1101             {
1102                 AppCase();
1103             }
1104             break;
1105         default:
1106             ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag );
1107         }
1108 
1109     //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1110     nAppCaseOptions = 0;
1111 }
1112 
1113 void TokenParser::AppCaseBegin()
1114 {
1115     switch ( aTag.nId )
1116     {
1117         case TAG_WRITER:
1118         case TAG_DRAW:
1119         case TAG_CALC:
1120         case TAG_IMAGE:
1121         case TAG_MATH:
1122         case TAG_CHART:
1123         case TAG_OFFICE:
1124         case TAG_IMPRESS:
1125         case TAG_SCHEDULE:
1126             {
1127                 //Token darf noch nicht vorgekommen sein im
1128                 //aktuellen Plattform-Case:
1129                 if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1130                 {
1131                     SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) );
1132                     match( aTag, aTag );
1133                 }
1134                 else {
1135                     ParseError( 13, "Tag defined twice in the same application-case.", aTag );
1136                 }
1137             }
1138     }
1139 }
1140 
1141 void TokenParser::CaseEnd()
1142 {
1143     //Produktion:
1144     //CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1145 
1146     switch ( aTag.nId )
1147     {
1148         case TAG_ELSE:
1149         {
1150             match( aTag, TAG_ELSE );
1151             Paragraph();
1152             match( aTag, TAG_END );
1153         }
1154         break;
1155         case TAG_END:
1156         {
1157             match( aTag, TAG_END );
1158         }
1159         break;
1160         default:
1161             ParseError( 2, "<#ELSE> or <#END> expected.", aTag );
1162     }
1163 }
1164 
1165 void TokenParser::SimpleTag()
1166 {
1167 
1168     switch ( aTag.nId )
1169     {
1170         case TAG_HELPID:
1171             {
1172                 match( aTag, TAG_HELPID );
1173             }
1174             break;
1175         case TAG_OFFICEFULLNAME:
1176         case TAG_OFFICENAME:
1177         case TAG_OFFICEPATH:
1178         case TAG_OFFICEVERSION:
1179         case TAG_PORTALNAME:
1180         case TAG_PORTALFULLNAME:
1181         case TAG_PORTALPATH:
1182         case TAG_PORTALVERSION:
1183         case TAG_PORTALSHORTNAME:
1184 
1185         case TAG_REFINSERT:
1186             {
1187                 match( aTag, aTag );
1188             }
1189             break;
1190         default:
1191             ParseError( 15, "[<#SimpleTag>] expected.", aTag );
1192     }
1193 }
1194 
1195 void TokenParser::TagPair()
1196 {
1197     switch ( aTag.nId )
1198     {
1199         case TAG_BOLDON:
1200             {
1201                 match( aTag, TAG_BOLDON );
1202                 Paragraph();
1203                 match( aTag, TAG_BOLDOFF );
1204             }
1205             break;
1206         case TAG_ITALICON:
1207             {
1208                 match( aTag, TAG_ITALICON );
1209                 Paragraph();
1210                 match( aTag, TAG_ITALICOFF );
1211             }
1212             break;
1213         case TAG_UNDERLINEON:
1214             {
1215                 match( aTag, TAG_UNDERLINEON );
1216                 Paragraph();
1217                 match( aTag, TAG_UNDERLINEOFF );
1218             }
1219             break;
1220         case TAG_COMMONSTART:
1221             {
1222                 //remember tag so we can give the original tag in case of an error
1223                 TokenInfo aEndTag( aTag );
1224                 aEndTag.nId = TAG_COMMONEND;
1225                 match( aTag, TAG_COMMONSTART );
1226                 Paragraph();
1227                 match( aTag, aEndTag );
1228             }
1229             break;
1230         default:
1231             ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag );
1232     }
1233 }
1234 
1235 
1236 void TokenParser::TagRef()
1237 {
1238     switch ( aTag.nId )
1239     {
1240         case TAG_GRAPHIC:
1241         case TAG_NEXTVERSION:
1242             {
1243                 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1244                 {
1245                     TokenId aThisToken = aTag.nId;
1246                     SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1247                     match( aTag, aTag );
1248                     Paragraph();
1249                     if ( aThisToken == TAG_GRAPHIC )
1250                         match( aTag, TAG_ENDGRAPHIC );
1251                     else
1252                         match( aTag, TAG_VERSIONEND );
1253                     // don't reset since alowed only once per paragraph
1254                     // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1255                 }
1256                 else
1257                 {
1258                     ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag );
1259                 }
1260             }
1261             break;
1262         case TAG_AVIS:
1263         case TAG_AHID:
1264             {
1265                 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1266                 {
1267                     TokenId aThisToken = aTag.nId;
1268                     SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1269                     match( aTag, aTag );
1270                     Paragraph();
1271                     match( aTag, TAG_AEND );
1272                     RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1273                 }
1274                 else
1275                 {
1276                     ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag );
1277                 }
1278             }
1279             break;
1280         case TAG_HREF:
1281         case TAG_NAME:
1282             {
1283 
1284             }
1285             // NOBREAK
1286         case TAG_KEY:
1287         case TAG_INDEX:
1288         case TAG_TITEL:
1289         case TAG_REFSTART:
1290             {
1291                 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1292                 {
1293                     TokenId aThisToken = aTag.nId;
1294                     match( aTag, aTag );
1295                     if ( aThisToken != TAG_NAME )
1296                     {   // TAG_NAME has no TAG_END
1297                         SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1298                         Paragraph();
1299                         match( aTag, TAG_END );
1300                         RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1301                     }
1302                 }
1303                 else
1304                 {
1305                     ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag );
1306                 }
1307             }
1308             break;
1309         default:
1310             ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag );
1311     }
1312 }
1313 
1314 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken )
1315 {
1316     return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) );
1317 }
1318 
1319 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken )
1320 {
1321     TokenInfo aExpectedToken( rExpectedToken );
1322     if ( aCurrentToken.nId == aExpectedToken.nId )
1323     {
1324         if ( ( aCurrentToken.nId == TAG_COMMONEND
1325                && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) )
1326              || aCurrentToken.nId != TAG_COMMONEND )
1327         {
1328             aTag = aParser.GetNextToken( *pErrorList );
1329             return sal_True;
1330         }
1331     }
1332 
1333     if ( aExpectedToken.nId == TAG_COMMONEND )
1334     {
1335         aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 );
1336     }
1337 
1338     ByteString sTmp( "Expected Symbol" );
1339     if ( aCurrentToken.nId == TAG_NOMORETAGS )
1340     {
1341         ParseError( 7, sTmp, aExpectedToken );
1342     }
1343     else
1344     {
1345         sTmp += ": ";
1346         sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 );
1347         sTmp += " near ";
1348         ParseError( 7, sTmp, aCurrentToken );
1349     }
1350     return sal_False;
1351 }
1352 
1353 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag )
1354 {
1355     pErrorList->AddError( nErrNr, aErrMsg, rTag);
1356 
1357     // Das Fehlerhafte Tag ueberspringen
1358     aTag = aParser.GetNextToken( *pErrorList );
1359 }
1360 
1361 
1362 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag )
1363         : nErrorNr( PnErrorNr )
1364         , aErrorText( PaErrorText )
1365         , nTagBegin( 0 )
1366         , nTagLength( 0 )
1367 {
1368     String aLexem( SimpleParser::GetLexem( rTag ) );
1369     aErrorText.Append(": ");
1370     aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 );
1371     if ( rTag.nId == TAG_NOMORETAGS )
1372         aErrorText.Append(" at end of line ");
1373     else if ( rTag.nPos != TOK_INVALIDPOS )
1374     {
1375         aErrorText.Append(" at Position ");
1376         aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) );
1377     }
1378     nTagBegin = rTag.nPos;
1379     nTagLength = aLexem.Len();
1380 }
1381 
1382 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1383 : ParserMessage( ErrorNr, ErrorText, rTag )
1384 {}
1385 
1386 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1387 : ParserMessage( ErrorNr, ErrorText, rTag )
1388 {}
1389 
1390 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens )
1391 {
1392     TokenId aTokenId = aToken.nId;
1393     TokenId aTokenGroup = TAG_GROUP( aTokenId );
1394     if ( TAG_GROUP_PROGSWITCH == aTokenGroup
1395         || TAG_REFINSERT == aTokenId
1396         || TAG_REFSTART == aTokenId
1397         || TAG_NAME == aTokenId
1398         || TAG_HREF == aTokenId
1399         || TAG_AVIS == aTokenId
1400         || TAG_AHID == aTokenId
1401         || TAG_GRAPHIC == aTokenId
1402         || TAG_NEXTVERSION == aTokenId
1403         || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) )
1404     {
1405         if ( TAG_GROUP_META == aTokenGroup )
1406             aMetaTokens |= aTokenId;
1407         return sal_True;
1408     }
1409     else if (   TAG_COMMONSTART == aTokenId
1410              || TAG_COMMONEND == aTokenId )
1411     {
1412         String aTagName = aToken.GetTagName();
1413         return !(aTagName.EqualsIgnoreCaseAscii( "comment" )
1414               || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" )
1415               || aTagName.EqualsIgnoreCaseAscii( "emph" )
1416               || aTagName.EqualsIgnoreCaseAscii( "item" )
1417               || aTagName.EqualsIgnoreCaseAscii( "br" ) );
1418     }
1419     return sal_False;
1420 }
1421 
1422 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags )
1423 {
1424     sal_uLong i=0,j=0;
1425     // Clean old Warnings
1426     while ( aCompareWarningList.Count() )
1427     {
1428         delete aCompareWarningList.GetCurObject();
1429         aCompareWarningList.Remove();
1430     }
1431 
1432     /* in xml tags, do not require the following tags
1433         comment
1434         bookmark_value
1435         emph
1436         item
1437         br
1438     */
1439 
1440     // filter uninteresting Tags
1441     TokenId aMetaTokens = 0;
1442     for ( i=0 ; i < aReference.Count() ; i++ )
1443     {
1444         if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) )
1445             aReference.GetObject( i ).SetDone();
1446     }
1447 
1448     aMetaTokens = 0;
1449     for ( i=0 ; i < aTestee.Count() ; i++ )
1450     {
1451         if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) )
1452             aTestee.GetObject( i ).SetDone();
1453     }
1454 
1455     // remove all matching tags
1456     for ( i=0 ; i < aReference.Count() ; i++ )
1457     {
1458         if ( aReference.GetObject( i ).IsDone() )
1459             continue;
1460 
1461         sal_Bool bTagFound = sal_False;
1462         for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1463         {
1464             if ( aTestee.GetObject( j ).IsDone() )
1465                 continue;
1466 
1467             if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) )
1468             {
1469                 aReference.GetObject( i ).SetDone();
1470                 aTestee.GetObject( j ).SetDone();
1471                 bTagFound = sal_True;
1472             }
1473         }
1474     }
1475 
1476     sal_Bool bCanFix = sal_True;
1477 
1478     if ( bFixTags )
1479     {
1480         // we fix only if its a really simple case
1481         sal_uInt16 nTagCount = 0;
1482         for ( i=0 ; i < aReference.Count() ; i++ )
1483             if ( !aReference.GetObject( i ).IsDone() )
1484                 nTagCount++;
1485         if ( nTagCount > 1 )
1486             bCanFix = sal_False;
1487 
1488         nTagCount = 0;
1489         for ( i=0 ; i < aTestee.Count() ; i++ )
1490             if ( !aTestee.GetObject( i ).IsDone() )
1491                 nTagCount++;
1492         if ( nTagCount > 1 )
1493             bCanFix = sal_False;
1494     }
1495 
1496     // generate errors for tags that have differing attributes
1497     for ( i=0 ; i < aReference.Count() ; i++ )
1498     {
1499         if ( aReference.GetObject( i ).IsDone() )
1500             continue;
1501 
1502         sal_Bool bTagFound = sal_False;
1503         for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1504         {
1505             if ( aTestee.GetObject( j ).IsDone() )
1506                 continue;
1507 
1508             if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) )
1509             {
1510                 aReference.GetObject( i ).SetDone();
1511                 aTestee.GetObject( j ).SetDone();
1512                 bTagFound = sal_True;
1513             }
1514         }
1515     }
1516 
1517     // list remaining tags as errors
1518     for ( i=0 ; i < aReference.Count() ; i++ )
1519     {
1520         if ( aReference.GetObject( i ).IsDone() )
1521             continue;
1522 
1523         aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) );
1524     }
1525     for ( i=0 ; i < aTestee.Count() ; i++ )
1526     {
1527         if ( aTestee.GetObject( i ).IsDone() )
1528             continue;
1529 
1530         aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) );
1531     }
1532 
1533     for ( i=0 ; i < aReference.Count() ; i++ )
1534         aReference.GetObject( i ).SetDone( sal_False );
1535 
1536     for ( i=0 ; i < aTestee.Count() ; i++ )
1537         aTestee.GetObject( i ).SetDone( sal_False );
1538 }
1539 
1540 void LingTest::CheckReference( GSILine *aReference )
1541 {
1542     aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() );
1543 }
1544 
1545 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags )
1546 {
1547     aFixedTestee = aTestee->GetUText();
1548     aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() );
1549 
1550     if ( bHasSourceLine )
1551         CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags );
1552 
1553     if ( bFixTags )
1554     {
1555         TokenList& aTesteeTokens = aTesteeParser.GetTokenList();
1556         sal_Bool bFixesDone = sal_False;
1557         // count backwards to allow replacing from right to left
1558         int i;
1559         for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- )
1560         {
1561             if ( aTesteeTokens.GetObject( i ).HasBeenFixed() )
1562             {
1563                 bFixesDone = sal_True;
1564                 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() );
1565             }
1566         }
1567         if ( bFixesDone )
1568         {
1569             aTestee->SetUText( aFixedTestee );
1570             aTestee->SetFixed();
1571         }
1572     }
1573 }
1574 
1575