1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 /* compile with flex++ -8 -f -+ -Sflex.skl -ortfparser.cxx rtfparser.lex */ 23 %option yylineno 24 %{ 25 //#include <io.h> 26 #include <math.h> 27 #include <string.h> 28 #include <osl/file.h> 29 #include <assert.h> 30 #include <vector> 31 32 #if defined (UNX) 33 #define stricmp strcasecmp 34 #endif 35 36 writerfilter::rtftok::RTFScanner* writerfilter::rtftok::RTFScanner::createRTFScanner(class writerfilter::rtftok::RTFInputSource& inputSource, writerfilter::rtftok::RTFScannerHandler &eventHandler) 37 { 38 return new yyFlexLexer(&inputSource, eventHandler); 39 } 40 41 42 43 extern "C" { 44 //int isatty(int fd) { return 0; } 45 int yywrap(void) { return 1; } 46 } 47 48 /* 49 oslFileHandle yy_osl_in=NULL; 50 #define YY_INPUT(buf,result,max_size) \ 51 {\ 52 {\ 53 assert(yy_osl_in!=NULL);\ 54 sal_Bool isEOF;\ 55 oslFileError ret=osl_isEndOfFile( yy_osl_in, &isEOF );\ 56 assert(ret==osl_File_E_None);\ 57 if (isEOF)\ 58 {\ 59 result=YY_NULL;\ 60 }\ 61 else\ 62 {\ 63 sal_uInt64 bytesRead;\ 64 ret=osl_readFile( yy_osl_in, buf, max_size, &bytesRead);\ 65 assert(ret==osl_File_E_None);\ 66 result = bytesRead; \ 67 }\ 68 }\ 69 } 70 */ 71 72 //extern RtfTokenizer* this; 73 void yyFlexLexer::split_ctrl(char *_yytext, char* token, char *value) 74 { 75 int i=0; // skip first '\' 76 while(_yytext[i]!=0 && (_yytext[i]=='\r' || _yytext[i]=='\n')) i++; 77 while(_yytext[i]!=0 && (_yytext[i]<'A' || (_yytext[i]>'Z' && _yytext[i]<'a') || _yytext[i]>'z')) i++; 78 while(_yytext[i]!=0 && _yytext[i]>='A') *(token++)=_yytext[i++]; 79 *token=0; 80 while(_yytext[i]!=0 && _yytext[i]>' ') *(value++)=_yytext[i++]; 81 *value=0; 82 } 83 84 void yyFlexLexer::raise_ctrl(char* _yytext) 85 { 86 char token[50]; 87 char value[50]; 88 split_ctrl(_yytext, token, value); 89 eventHandler.ctrl(token, value); 90 } 91 92 void yyFlexLexer::raise_dest(char* _yytext) 93 { 94 char token[50]; 95 char value[50]; 96 split_ctrl(_yytext, token, value); 97 eventHandler.dest(token, value); 98 } 99 100 #define _num_of_destctrls (sizeof(_destctrls)/sizeof(_destctrls[0])) 101 static const char* _destctrls[] = { 102 "aftncn", 103 "aftnsep", 104 "aftnsepc", 105 "annotation", 106 "atnauthor", 107 "atndate", 108 "atnicn", 109 "atnid", 110 "atnparent", 111 "atnref", 112 "atntime", 113 "atrfend", 114 "atrfstart", 115 "author", 116 "background", 117 "bkmkend", 118 "bkmkstart", 119 "buptim", 120 "category", 121 "colortbl", 122 "comment", 123 "company", 124 "creatim", 125 "datafield", 126 "do", 127 "doccomm", 128 "docvar", 129 "dptxbxtext", 130 "falt", 131 "fchars", 132 "ffdeftext", 133 "ffentrymcr", 134 "ffexitmcr", 135 "ffformat", 136 "ffhelptext", 137 "ffl", 138 "ffname", 139 "ffstattext", 140 "field", 141 "file", 142 "filetbl", 143 "fldinst", 144 "fldrslt", 145 "fldtype", 146 "fname", 147 "fontemb", 148 "fontfile", 149 "fonttbl", 150 "footer", 151 "footer", 152 "footerf", 153 "footerl", 154 "footnote", 155 "formfield", 156 "ftncn", 157 "ftnsep", 158 "ftnsepc", 159 "g", 160 "generator", 161 "gridtbl", 162 "header", 163 "header", 164 "headerf", 165 "headerl", 166 "htmltag", 167 "info", 168 "keycode", 169 "keywords", 170 "lchars", 171 "levelnumbers", 172 "leveltext", 173 "lfolevel", 174 "list", 175 "listlevel", 176 "listname", 177 "listoverride", 178 "listoverridetable", 179 "listtable", 180 "listtext", 181 "manager", 182 "mhtmltag", 183 "nesttableprops", 184 "nextfile", 185 "nonesttables", 186 "nonshppict", 187 "objalias", 188 "objclass", 189 "objdata", 190 "object", 191 "objname", 192 "objsect", 193 "objtime", 194 "oldcprops", 195 "oldpprops", 196 "oldsprops", 197 "oldtprops", 198 "operator", 199 "panose", 200 "pgp", 201 "pgptbl", 202 "picprop", 203 "pict", 204 "pn", 205 "pnseclvl", 206 "pntext", 207 "pntxta", 208 "pntxtb", 209 "printim", 210 "private", 211 "pwd", 212 "pxe", 213 "result", 214 "revtbl", 215 "revtim", 216 "rsidtbl", 217 "rtf", 218 "rxe", 219 "shp", 220 "shpgrp", 221 "shpinst", 222 "shppict", 223 "shprslt", 224 "shptxt", 225 "sn", 226 "sp", 227 "stylesheet", 228 "subject", 229 "sv", 230 "tc", 231 "template", 232 "title", 233 "txe", 234 "ud", 235 "upr", 236 "urtf", 237 "userprops", 238 "xe" 239 }; 240 241 void yyFlexLexer::raise_destOrCtrl(char* _yytext) 242 { 243 char token[50]; 244 char value[50]; 245 split_ctrl(_yytext, token, value); 246 char* result=(char*)bsearch(token, _destctrls, _num_of_destctrls, 20, (int (*)(const void*, const void*))stricmp); 247 if (result) 248 { 249 eventHandler.dest(token, value); 250 } 251 else 252 { 253 eventHandler.lbrace(); 254 eventHandler.ctrl(token, value); 255 } 256 } 257 258 %} 259 260 %% 261 \{\\upr\{" "? { /* skip upr destination */ 262 int c; 263 int br=1; 264 while (br>0 && (c = yyinput()) != EOF) 265 { 266 if (c=='}') br--; 267 if (c=='{') br++; 268 } 269 eventHandler.lbrace(); 270 num_chars+=yyleng; 271 } 272 273 274 \\bin(("+"|"-")?[0-9]*)?" "? { 275 raise_dest(yytext); 276 num_chars+=yyleng; 277 int len=atoi(yytext+4); 278 num_chars+=len; 279 // pictureBytes=2*len; 280 while ( len ) 281 { 282 int c = yyinput(); 283 eventHandler.addBinData((unsigned char)c); 284 len--; 285 } 286 eventHandler.rbrace(); 287 } 288 289 \{[\r\n]*\\\*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* stared dest word */ 290 raise_dest(yytext); 291 num_chars+=yyleng; 292 } 293 \{[\r\n]*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* dest word */ 294 raise_destOrCtrl(yytext); 295 } 296 \\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* ctrl word */ 297 raise_ctrl(yytext); 298 star_flag=0; 299 num_chars+=yyleng; 300 } 301 \\\'[A-Fa-f0-9][A-Fa-f0-9] { /* hex char */ 302 eventHandler.addHexChar(yytext); 303 num_chars+=yyleng; 304 } 305 \\* { /* star */ 306 star_flag=1; 307 num_chars+=yyleng; 308 } 309 \{ { /* lbrace */ 310 eventHandler.lbrace(); 311 num_chars+=yyleng; 312 } 313 \} { /* rbrace */ 314 eventHandler.rbrace(); 315 num_chars+=yyleng; 316 } 317 \\\| { num_chars+=yyleng;} 318 \\~ {num_chars+=yyleng; eventHandler.addCharU(0xa0);} 319 \\- {num_chars+=yyleng;} 320 \\_ {num_chars+=yyleng;} 321 \\\: {num_chars+=yyleng;} 322 \n { ++num_lines;num_chars+=yyleng;} 323 \r {num_chars+=yyleng;} 324 \t {num_chars+=yyleng;} 325 " "(" "+) { eventHandler.addSpaces(yyleng); num_chars+=yyleng;} 326 . { eventHandler.addChar(yytext[0]); num_chars+=yyleng;} 327 %% 328