1 /* 2 * This file is part of gir-to-d. 3 * 4 * gir-to-d is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License 6 * as published by the Free Software Foundation, either version 3 7 * of the License, or (at your option) any later version. 8 * 9 * gir-to-d is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public License 15 * along with gir-to-d. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 module gtd.XMLReader; 19 20 import std.algorithm; 21 import std.array; 22 import std.conv : to; 23 import std.exception; 24 import std.range; 25 import std.string; 26 import std.traits: isSomeChar; 27 import std.uni; 28 29 import gtd.WrapException; 30 31 struct XMLNode 32 { 33 XMLNodeType type; 34 35 string value; 36 string[string] attributes; 37 } 38 39 enum XMLNodeType 40 { 41 None, 42 PI, 43 StartTag, 44 Text, 45 CData, 46 DocType, 47 Comment, 48 EmptyTag, 49 EndTag, 50 DocumentEnd 51 } 52 53 class XMLReader(T) 54 if (isInputRange!T && isSomeChar!(ElementType!T) ) 55 { 56 XMLNode front; 57 string fileName; 58 59 static if ( is( T == string ) ) 60 private CountLines!ByChar document; 61 else 62 private CountLines!T document; 63 64 /** 65 * Params: 66 * document = The XML document to parse. 67 * fileName = File name to print in diagnostic messages. 68 */ 69 this(T document, string fileName = null) 70 { 71 static if ( is( T == string ) ) 72 this.document = CountLines!ByChar(ByChar(document)); 73 else 74 this.document = CountLines!T(document); 75 76 this.fileName = fileName; 77 78 popFront(); 79 } 80 81 @property size_t line() 82 { 83 return document.line; 84 } 85 alias lineNumber = line; 86 87 void popFront() 88 { 89 front = XMLNode(); 90 91 if ( document.empty ) 92 { 93 front.type = XMLNodeType.DocumentEnd; 94 return; 95 } 96 97 if ( document.front == '<' ) 98 parseTag(); 99 else 100 parseText(); 101 } 102 103 @property bool empty() 104 { 105 return document.empty && front.type == XMLNodeType.DocumentEnd; 106 } 107 108 private void parseTag() 109 { 110 document.popFront(); 111 112 switch ( document.front ) 113 { 114 case '!': 115 document.popFront(); 116 switch ( document.front ) 117 { 118 case '[': 119 enforce(document.skipOver("[CDATA[")); 120 parseCDATA(); 121 break; 122 case 'D': 123 enforce(document.skipOver("!DOCTYPE")); 124 parseDocType(); 125 break; 126 case '-': 127 enforce(document.skipOver("--")); 128 parseComment(); 129 break; 130 default: 131 throw new XMLException(this, "Invalid XML tag"); 132 } 133 break; 134 case '?': 135 document.popFront(); 136 parsePI(); 137 break; 138 case '/': 139 document.popFront(); 140 parseStartTag(); 141 front.type = XMLNodeType.EndTag; 142 break; 143 default: 144 parseStartTag(); 145 break; 146 } 147 148 skipWhitespace(); 149 } 150 151 private void parseCDATA() 152 { 153 front.type = XMLNodeType.CData; 154 auto buff = appender!string(); 155 156 while ( !document.empty ) 157 { 158 if ( document.front == ']' ) 159 { 160 document.popFront(); 161 162 if ( document.front != ']' ) 163 { 164 buff.put(']'); 165 buff.put(document.front); 166 document.popFront(); 167 continue; 168 } 169 170 document.popFront(); 171 172 if ( document.front == '>' ) 173 { 174 document.popFront(); 175 return; 176 } 177 } 178 179 buff.put(document.front); 180 document.popFront(); 181 } 182 183 front.value = buff.data; 184 } 185 186 private void parseDocType() 187 { 188 front.type = XMLNodeType.DocType; 189 auto buff = appender!string(); 190 int bracketCount; 191 192 skipWhitespace(); 193 194 while ( !document.empty ) 195 { 196 switch ( document.front ) 197 { 198 case '[': 199 bracketCount++; 200 break; 201 case ']': 202 bracketCount--; 203 break; 204 case '>': 205 if ( bracketCount == 0 ) 206 { 207 document.popFront(); 208 return; 209 } 210 break; 211 default: break; 212 } 213 214 buff.put(document.front); 215 document.popFront(); 216 } 217 218 front.value = buff.data.stripRight(); 219 } 220 221 private void parseComment() 222 { 223 front.type = XMLNodeType.Comment; 224 auto buff = appender!string(); 225 226 while ( !document.empty ) 227 { 228 if ( document.front == '-' ) 229 { 230 document.popFront(); 231 232 if ( document.front != '-' ) 233 { 234 buff.put('-'); 235 buff.put(document.front); 236 document.popFront(); 237 continue; 238 } 239 240 document.popFront(); 241 242 if ( document.front == '>' ) 243 { 244 document.popFront(); 245 return; 246 } 247 248 throw new XMLException(this, "-- not allowed in comments."); 249 } 250 251 buff.put(document.front); 252 document.popFront(); 253 } 254 255 front.value = buff.data.strip(); 256 } 257 258 private void parsePI() 259 { 260 front.type = XMLNodeType.PI; 261 auto buff = appender!string(); 262 263 while ( !document.empty ) 264 { 265 if ( document.front == '?' ) 266 { 267 document.popFront(); 268 269 if ( document.front == '>' ) 270 { 271 document.popFront(); 272 return; 273 } 274 275 buff.put('?'); 276 } 277 278 buff.put(document.front); 279 document.popFront(); 280 } 281 282 front.value = buff.data.stripRight(); 283 } 284 285 private void parseStartTag() 286 { 287 front.type = XMLNodeType.StartTag; 288 auto buff = appender!string(); 289 290 while ( !document.empty && !(document.front.isWhite() || document.front == '/' || document.front == '>') ) 291 { 292 buff.put(document.front); 293 document.popFront(); 294 } 295 296 front.value = buff.data; 297 298 while ( !document.empty ) 299 { 300 skipWhitespace(); 301 302 if ( document.front == '/' ) 303 { 304 front.type = XMLNodeType.EmptyTag; 305 document.popFront(); 306 } 307 308 if ( document.front == '>' ) 309 { 310 document.popFront(); 311 return; 312 } 313 314 buff = appender!string(); 315 string attName; 316 317 while ( !document.empty && !(document.front.isWhite() || document.front == '=') ) 318 { 319 buff.put(document.front); 320 document.popFront(); 321 } 322 323 document.popFront(); 324 if ( document.front == '=' ) 325 document.popFront(); 326 327 attName = buff.data; 328 buff = appender!string(); 329 330 if ( document.front.isWhite() ) 331 skipWhitespace(); 332 333 ElementType!(typeof(document)) quote = document.front; 334 document.popFront(); 335 336 AttValue: while ( !document.empty ) 337 { 338 switch ( document.front ) 339 { 340 case '\'': 341 case '"': 342 if ( document.front != quote ) 343 goto default; 344 345 document.popFront(); 346 break AttValue; 347 case '&': 348 parseAmpersand(buff); 349 break; 350 default: 351 buff.put(document.front); 352 break; 353 } 354 355 document.popFront(); 356 } 357 358 front.attributes[attName] = buff.data; 359 } 360 } 361 362 private void parseText() 363 { 364 front.type = XMLNodeType.Text; 365 auto buff = appender!string(); 366 367 Text: while ( !document.empty ) 368 { 369 switch ( document.front ) 370 { 371 case '<': 372 break Text; 373 case '&': 374 parseAmpersand(buff); 375 break; 376 default: 377 buff.put(document.front); 378 break; 379 } 380 381 document.popFront(); 382 } 383 384 front.value = buff.data.stripRight(); 385 } 386 387 private void skipWhitespace() 388 { 389 while ( !document.empty && isWhite(document.front) ) 390 document.popFront(); 391 } 392 393 private void parseAmpersand(Appender!(string) buff) 394 { 395 ElementType!(typeof(document))[5] sequence; 396 int index; 397 398 document.popFront(); 399 400 while ( document.front != ';' ) 401 { 402 sequence[index++] = document.front; 403 document.popFront(); 404 } 405 406 switch ( sequence[0 .. index] ) 407 { 408 case "#34": 409 case "quot": 410 buff.put('"'); 411 break; 412 case "#38": 413 case "amp": 414 buff.put('&'); 415 break; 416 case "#39": 417 case "apos": 418 buff.put('\''); 419 break; 420 case "#60": 421 case "lt": 422 buff.put('<'); 423 break; 424 case "#62": 425 case "gt": 426 buff.put('>'); 427 break; 428 case "#x4": 429 buff.put('\004'); 430 break; 431 default: 432 throw new XMLException(this, "Unregonized escape secuence"); 433 } 434 } 435 436 unittest 437 { 438 auto reader = new XMLReader("<test>"); 439 assert(reader.front.value == "<test>"); 440 } 441 } 442 443 /** 444 * Skip the current tag and it's content. 445 * Leaves the reader pointing to the end tag with the same depth. 446 */ 447 void skipTag(T)(XMLReader!T reader) 448 { 449 if ( reader.front.type == XMLNodeType.EmptyTag ) 450 return; 451 if ( reader.front.type != XMLNodeType.StartTag ) 452 { 453 reader.popFront(); 454 return; 455 } 456 457 string tagName = reader.front.value; 458 size_t depth; 459 460 while ( !reader.empty ) 461 { 462 if ( reader.front.type == XMLNodeType.StartTag ) 463 depth++; 464 465 if ( reader.front.type == XMLNodeType.EndTag ) 466 depth--; 467 468 if ( depth == 0 && reader.front.value == tagName ) 469 return; 470 471 reader.popFront(); 472 } 473 } 474 475 /** 476 * Is this an end tag with name tagName. 477 */ 478 bool endTag(T)(XMLReader!T reader, string tagName) 479 { 480 return reader.front.type == XMLNodeType.EndTag && reader.front.value == tagName; 481 } 482 483 /// ditto. 484 bool endTag(T)(XMLReader!T reader, string[] tagNames ...) 485 { 486 return reader.front.type == XMLNodeType.EndTag && tagNames.canFind(reader.front.value); 487 } 488 489 class XMLException : WrapException 490 { 491 this (T)(XMLReader!T reader, string msg) 492 { 493 super(msg, reader.fileName, reader.line, null); 494 } 495 496 override string toString() 497 { 498 string s; 499 toString((buf) { s ~= buf; }); 500 return s; 501 } 502 503 override void toString(scope void delegate(in char[]) sink) const 504 { 505 sink(file); 506 sink("("); sink(to!string(line)); sink(")"); 507 508 if (msg.length) 509 { 510 sink(": "); sink(msg); 511 } 512 } 513 514 } 515 516 struct ByChar 517 { 518 string data; 519 520 @property char front() 521 { 522 return data[0]; 523 } 524 525 @property bool empty() 526 { 527 return !data.length; 528 } 529 530 void popFront() 531 { 532 assert(data.length, "Attempting to popFront() past the end of an array"); 533 data = data[1 .. $]; 534 } 535 536 @property ByChar save() 537 { 538 return this; 539 } 540 541 alias data this; 542 } 543 544 struct CountLines(Source) if (isSomeChar!(ElementType!Source)) 545 { 546 Source src; 547 size_t line = 1; 548 549 this(Source src) 550 { 551 this.src = src; 552 } 553 554 @property ElementType!Source front() 555 { 556 return src.front; 557 } 558 559 @property bool empty() 560 { 561 return src.empty; 562 } 563 564 void popFront() 565 { 566 src.popFront(); 567 568 if ( src.front == '\n' ) 569 line++; 570 } 571 572 @property typeof(this) save() 573 { 574 return typeof(this)(src.save); 575 } 576 }