1 module hjson.parser; 2 3 public import std.json : JSONValue; 4 5 import std.algorithm; 6 import std.array; 7 import std.ascii : isDecimalDigit = isDigit, isHexDigit; 8 import std.conv : ConvException, to; 9 import std.exception : _enforce = enforce, basicExceptionCtors; 10 import std.format : format; 11 import std.range; 12 import std.typecons : Flag; 13 import std.uni; 14 15 import hjson.adapter; 16 17 /** Parses a Hjson value into a `JSONValue` object. 18 Params: 19 hjson = string containing the Hjson. 20 Throws: 21 `HjsonException` when passed invalid Hjson. 22 Returns: 23 Parsed JSONValue. 24 */ 25 JSONValue parseHjson(string hjson) 26 { 27 JSONValue result; 28 scope consumer = StdJsonSerializer(&result); 29 hjson.parseHjson(consumer); 30 return result; 31 } 32 33 /** Parses a Hjson value and feeds the parsed tokens into given consumer. 34 This allows for parsing into representations other than std.json.JSONValue. 35 Params: 36 hjson = string containing the Hjson. 37 consumer = Object responsible for processing the parsed tokens. 38 Throws: 39 `HjsonException` when passed invalid Hjson. 40 */ 41 void parseHjson(Consumer)(string hjson, ref Consumer consumer) 42 { 43 size_t collumn = 0; 44 hjson.parseValue(collumn,consumer); 45 } 46 47 /// 48 class HjsonException : Exception 49 { 50 mixin basicExceptionCtors; 51 } 52 53 /** Parses a single Hjson value (object, array, string, number, bool or null). 54 Params: 55 hjson = string being parsed. May contain leading whitespace. 56 The beginning of the string until the end of the parsed 57 Hjson value is consumed. 58 collumn = How many `dchar`s were popped from the front of `hjson` since 59 last line feed. Will be updated by the function. 60 Needed to properly parse multiline strings. 61 consumer = Object responsible for processing the parsed tokens. 62 Throws: 63 `HjsonException` if `hjson` starts with an invalid Hjson value. 64 Invalid Hjson past the first valid value is not detected. 65 */ 66 void parseValue(Consumer)(ref string hjson, ref size_t collumn, ref Consumer consumer) 67 { 68 hjson.skipWC(collumn); 69 enforce(!hjson.empty, "Expected a value before EOF."); 70 71 if(hjson.front == '{') 72 { 73 auto state = consumer.objectBegin(); 74 hjson.parseAggregate!('{', '}', parseObjectMember)(collumn, consumer); 75 consumer.objectEnd(state); 76 } 77 else if(hjson.front == '[') 78 { 79 auto state = consumer.arrayBegin(); 80 hjson.parseAggregate!('[', ']', (ref hjs, ref col, ref ser){ 81 ser.elemBegin; 82 hjs.parseValue(col,ser); 83 })(collumn,consumer); 84 consumer.arrayEnd(state); 85 } 86 else if(hjson.save.startsWith("true")) 87 { 88 if(!hjson.tryParseBuiltin(collumn,true,"true",consumer)) 89 consumer.putValue(hjson.parseQuotelessString()); 90 } 91 else if(hjson.save.startsWith("false")) 92 { 93 if(!hjson.tryParseBuiltin(collumn,false,"false",consumer)) 94 consumer.putValue(hjson.parseQuotelessString()); 95 } 96 else if(hjson.save.startsWith("null")) 97 { 98 if(!hjson.tryParseBuiltin(collumn,null,"null",consumer)) 99 consumer.putValue(hjson.parseQuotelessString()); 100 } 101 else if(hjson.front == '"') 102 consumer.putValue(hjson.parseJSONString(collumn)); 103 else if(hjson.front == '\'') 104 { 105 auto r = hjson.save; 106 if(r.startsWith("'''")) 107 consumer.putValue(hjson.parseMultilineString(collumn)); 108 else 109 consumer.putValue(hjson.parseJSONString(collumn)); 110 } 111 else if(!hjson.front.isPunctuator) 112 { 113 if(!hjson.tryParseNumber(consumer)) 114 consumer.putValue(hjson.parseQuotelessString()); 115 } 116 else throw new HjsonException("Invalid Hjson."); 117 } 118 119 /** Parses a single Hjson object or array. 120 Params: 121 hjson = string being parsed. Must not contain leading whitespace. 122 The beginning of the string until the end of the parsed 123 Hjson value is consumed. 124 collumn = How many `dchar`s were popped from the front of `hjson` since 125 last line feed. Will be updated by the function. 126 Needed to properly parse multiline strings. 127 consumer = Object responsible for processing the parsed tokens. 128 start = Token which marks the beginning of parsed aggregate ('[' for array, '{' for object) 129 end = Token which marks the end of parsed aggregate (']' for array, '}' for object) 130 parseMember = Function used to parse a single aggregate member. Parameters are 131 the same as `parseAggregate`. Hjson passed to `parseMember` contains 132 no leading whitespace. 133 Throws: 134 `HjsonException` if `hjson` starts with an invalid Hjson value. 135 Invalid Hjson past the first valid value is not detected. 136 */ 137 void parseAggregate 138 (dchar start, dchar end, alias parseMember, Consumer) 139 (ref string hjson, ref size_t collumn, ref Consumer consumer) 140 in(!hjson.empty) 141 in(hjson.front == start) 142 { 143 // Get rid of opening '{' and whitespace 144 hjson.popFront(); 145 ++collumn; 146 hjson.skipWC(collumn); 147 148 //Handle empty Hjson object {whitespace/comments only} 149 enforce(!hjson.empty, "Expected member or '%s' before EOF.".format(end)); 150 if(hjson.front == end) 151 { 152 hjson.popFront(); 153 ++collumn; 154 return; 155 } 156 157 //Now we know we have at least one member 158 parseMember(hjson, collumn, consumer); 159 160 while(true) 161 { 162 // Skip member separator 163 bool gotMemberSeparator = hjson.skipWC(collumn); 164 enforce(!hjson.empty); 165 if(hjson.front == ',') 166 { 167 hjson.popFront(); 168 ++collumn; 169 gotMemberSeparator = true; 170 hjson.skipWC(collumn); 171 enforce(!hjson.empty); 172 } 173 174 if(hjson.front == end) 175 { 176 hjson.popFront(); 177 ++collumn; 178 return; 179 } 180 else 181 { 182 enforce(gotMemberSeparator, hjson); 183 parseMember(hjson, collumn, consumer); 184 } 185 } 186 assert(0, "Shouldn't get there"); 187 } 188 189 /** In JSON you can determine the type of the parsed value by looking at just their 190 first character. In Hjson if you follow a valid JSON number/bool/null with certain other 191 characters it will turn into a quoteless string. This function checks whether parsed 192 value turns into a quoteless string by looking at the following characters. 193 194 Params: 195 sufix = Hjson following the previously parsed value. 196 Returns: Whether `sufix` turns the preceding Hjson number/bool/null into a quoteless string. 197 */ 198 bool turnsIntoQuotelessString(string sufix) 199 { 200 if( 201 !sufix.empty && 202 sufix.front != ',' && 203 sufix.front != ']' && 204 sufix.front != '}' && 205 sufix.front != '\n' 206 ) { 207 // If there is a comment-starting token NOT SEPARATED BY WHITESPACE 208 // then we treat the entire thing as quoteless string 209 // 1234#notcomment is a quoteless string 210 // 1234 #comment is a number and a comment 211 foreach(commentStart; ["//", "/*", "#"]) 212 if(sufix.save.startsWith(commentStart)) 213 return true; 214 215 if(sufix.front.isWhite) 216 { 217 // We have whitespace after the number, but there is a non-punctuator token before 218 // the end of the line, so it's a quoteless string 219 size_t dummyCollumn; 220 if( 221 !skipWC(sufix, dummyCollumn) && 222 !sufix.empty && 223 sufix.front != ',' && 224 sufix.front != ']' && 225 sufix.front != '}' 226 ) return true; 227 } 228 else return true; //number followed by non-whitespace, non-comma char -> quoteless string 229 } 230 return false; 231 } 232 233 /** Attempts to parse a builtin constant. 234 Params: 235 hjson = string being parsed. Must not contain leading whitespace. 236 The beginning of the string until the end of the parsed 237 Hjson value is consumed if and only if the constant was 238 succesfully parsed. 239 collumn = How many `dchar`s were popped from the front of `hjson` since 240 last line feed. Will be updated by the function. 241 Needed to properly parse multiline strings. 242 value = Value of the constant. 243 repr = How the constant is represented in Hjson. 244 consumer = Object responsible for processing the parsed tokens. 245 Throws: 246 `HjsonException` if `hjson` starts with an invalid Hjson value. 247 Invalid Hjson past the first valid value is not detected. 248 Returns: 249 `true` if parsing the constant succeeds, `false` if the value was actually a quoteless string. 250 */ 251 bool tryParseBuiltin(T,Consumer)(ref string hjson, ref size_t collumn, T value, string repr, ref Consumer consumer) 252 { 253 auto sufix = hjson[repr.length..$]; 254 if(turnsIntoQuotelessString(sufix)) return false; 255 else 256 { 257 consumer.putValue(value); 258 hjson = sufix; 259 collumn += repr.walkLength; 260 return true; 261 } 262 } 263 264 /** Attempts to parse a Hjson number. 265 Params: 266 hjson = string being parsed. Must not contain leading whitespace. 267 The beginning of the string until the end of the parsed 268 Hjson value is consumed if and only if the number was 269 succesfully parsed. 270 consumer = Object responsible for processing the parsed tokens. 271 Throws: 272 `HjsonException` if `hjson` starts with an invalid Hjson value. 273 Invalid Hjson past the first valid value is not detected. 274 Returns: 275 `true` if parsing the number succeeds, `false` if the value was actually a quoteless string. 276 */ 277 bool tryParseNumber(Consumer)(ref string hjson, ref Consumer consumer) 278 { 279 size_t i=0; 280 bool parseAsDouble = false; 281 282 // Optional preceding - 283 if(hjson.front == '-') ++i; 284 if(i >= hjson.length) 285 return false; 286 287 // Integer part 288 if(hjson[i] == '0') ++i; 289 else if(hjson[i].isDecimalDigit) 290 // Don't use countUntil because it returns -1 if no value 291 // in the range satisfies the condition 292 i += hjson[i..$].until!(x => !x.isDecimalDigit).walkLength; 293 else return false; 294 295 // Fractional part 296 if(i < hjson.length && hjson[i] == '.') 297 { 298 ++i; 299 if(i >= hjson.length) 300 return false; 301 if(hjson[i].isDecimalDigit) 302 i += hjson[i..$].until!(x => !x.isDecimalDigit).walkLength; 303 else return false; 304 parseAsDouble = true; 305 } 306 307 // Exponent part 308 if(i < hjson.length && hjson[i].toLower == 'e') 309 { 310 ++i; 311 if(i >= hjson.length) 312 return false; 313 if(hjson[i] == '+' || hjson[i] == '-') 314 { 315 ++i; 316 if(i >= hjson.length) 317 return false; 318 } 319 if(hjson[i].isDecimalDigit) 320 i += hjson[i..$].until!(x => !x.isDecimalDigit).walkLength; 321 else return false; 322 parseAsDouble = true; 323 } 324 325 if(turnsIntoQuotelessString(hjson[i..$])) 326 return false; 327 328 if(!parseAsDouble) 329 try consumer.putValue(hjson[0..i].to!long); 330 catch(ConvException) 331 parseAsDouble = true; 332 333 if(parseAsDouble) 334 consumer.putValue(hjson[0..i].to!double); 335 336 hjson.popFrontN(i); 337 return true; 338 } 339 340 /** Parses a Hjson quoteless string. 341 Params: 342 hjson = Hjson being parsed. Must not contain leading whitespace. 343 The beginning of the Hjson until the end of the parsed 344 Hjson value is consumed. 345 Throws: 346 `HjsonException` if `hjson` starts with an invalid Hjson value. 347 Invalid Hjson past the first valid value is not detected. 348 Returns: The parsed string. 349 */ 350 string parseQuotelessString(ref string hjson) 351 in(!hjson.empty) 352 { 353 auto s = hjson.findSplitBefore("\n"); 354 hjson = s[1]; 355 auto result = s[0].stripRight!isWhite; 356 assert(!result.empty); 357 return result; 358 } 359 360 /** Parses a Hjson JSON-string. 361 Params: 362 hjson = Hjson being parsed. Must not contain leading whitespace. 363 The beginning of the Hjson until the end of the parsed 364 Hjson value is consumed. 365 collumn = How many `dchar`s were popped from the front of `hjson` since 366 last line feed. Will be updated by the function. 367 Needed to properly parse multiline strings. 368 Throws: 369 `HjsonException` if `hjson` starts with an invalid Hjson value. 370 Invalid Hjson past the first valid value is not detected. 371 Returns: The parsed string. 372 */ 373 string parseJSONString(ref string hjson, ref size_t collumn) 374 in(!hjson.empty) 375 in(hjson.front == '"' || hjson.front == '\'') 376 { 377 immutable terminator = hjson.front; 378 hjson.popFront(); 379 ++collumn; 380 381 string result; 382 383 while(!hjson.empty) 384 { 385 immutable c = hjson.front; 386 hjson.popFront; 387 ++collumn; 388 389 if(c == '\n') collumn = 0; 390 391 if(c == terminator) 392 { 393 return result; 394 } 395 else if(c == '\\') 396 { 397 enforce(!hjson.empty, "Incomplete escape sequence."); 398 immutable d = hjson.front; 399 hjson.popFront; 400 ++collumn; 401 switch(d) 402 { 403 case '"', '\'', '\\', '/': result ~= d; break; 404 405 case 'b': result ~= '\b'; break; 406 case 'f': result ~= '\f'; break; 407 case 'n': result ~= '\n'; break; 408 case 'r': result ~= '\r'; break; 409 case 't': result ~= '\t'; break; 410 411 case 'u': 412 enforce(hjson.length >= 4, "Incomplete Unicode escape sequence."); 413 auto code = hjson[0..4]; 414 enforce(code.all!isHexDigit, "Invalid Unicode escape sequence."); 415 result ~= cast(wchar) code.to!uint(16); 416 hjson.popFrontN(4); 417 collumn += 4; 418 break; 419 420 default: throw new HjsonException("Invalid escape sequence: \\%s".format(d)); 421 } 422 } 423 else result ~= c; 424 } 425 throw new HjsonException("Unterminated string literal."); 426 } 427 428 /** Parses a Hjson multiline string. 429 Params: 430 hjson = Hjson being parsed. Must not contain leading whitespace. 431 The beginning of the Hjson until the end of the parsed 432 Hjson value is consumed. 433 collumn = How many `dchar`s were popped from the front of `hjson` since 434 last line feed. 435 Throws: 436 `HjsonException` if `hjson` starts with an invalid Hjson value. 437 Invalid Hjson past the first valid value is not detected. 438 Returns: The parsed string. 439 */ 440 string parseMultilineString(ref string hjson, immutable size_t collumn) 441 in(!hjson.empty) 442 in(hjson.save.startsWith("'''")) 443 { 444 hjson.popFrontN(3); 445 auto s = hjson.findSplit("'''"); 446 enforce(s[1] == "'''", "Unterminated multiline string (missing ''')."); 447 hjson = s[2]; 448 auto str = s[0]; 449 450 //If line with opening ''' contains only whitespace, ignore that whitespace 451 auto prefixWhitespace = str.save.until!(x => !x.isWhite); 452 if(prefixWhitespace.canFind('\n')) 453 str = str.find('\n')[1..$]; 454 455 //Unindent 456 string result; 457 size_t ignoreWhitespace = collumn; 458 foreach(x; str) 459 if(x == '\n') 460 { 461 ignoreWhitespace = collumn; 462 result ~= x; 463 } 464 else if(x.isWhite && ignoreWhitespace > 0) 465 --ignoreWhitespace; 466 else 467 { 468 ignoreWhitespace = 0; 469 result ~= x; 470 } 471 472 // If sufix whitespace contains LF: remove it and all whitespace afterwards 473 auto trailingWhitespace = result.retro.until!(x => !x.isWhite); 474 if(trailingWhitespace.save.canFind('\n')) 475 result.length = result.length - trailingWhitespace.countUntil('\n') - 1; 476 477 return result; 478 } 479 480 /** Parses a single object member. 481 Params: 482 hjson = Hjson being parsed. Must not contain leading whitespace. 483 The beginning of the Hjson until the end of the parsed 484 Hjson object member is consumed. 485 collumn = How many `dchar`s were popped from the front of `hjson` since 486 last line feed. Will be updated by the function. 487 Needed to properly parse multiline strings. 488 consumer = Object responsible for processing the parsed tokens. 489 Throws: 490 `HjsonException` if `hjson` starts with an invalid Hjson object member. 491 Invalid Hjson past the first valid object member is not detected. 492 */ 493 void parseObjectMember(Consumer)(ref string hjson, ref size_t collumn, ref Consumer consumer) 494 { 495 // Parse the key 496 string key; 497 enforce(!isPunctuator(hjson.front), "Expected Hjson member but got punctuator."); 498 if(hjson.front == '"' || hjson.front == '\'') 499 key = hjson.parseJSONString(collumn); 500 else { 501 size_t keyLength = 0; 502 while( 503 keyLength < hjson.length && 504 !hjson[keyLength].isPunctuator && 505 !hjson[keyLength].isWhite 506 ) ++keyLength; 507 key = hjson[0..keyLength]; 508 hjson.popFrontN(keyLength); 509 collumn += keyLength; 510 } 511 512 // Get rid of ':' 513 hjson.skipWC(collumn); 514 enforce(!hjson.empty); 515 enforce(hjson.front == ':', "Expected ':'"); 516 hjson.popFront(); 517 ++collumn; 518 519 // Parse the value 520 hjson.skipWC(collumn); 521 enforce(!hjson.empty); 522 523 consumer.putKey(key); 524 hjson.parseValue(collumn, consumer); 525 } 526 527 /** Consumes all whitespace and comments from the front of the passed Hjson. 528 Params: 529 hjson = Hjson from which whitespace and comments should be consumed. 530 collumn = How many `dchar`s were popped from the front of `hjson` since 531 last line feed. Will be updated by the function. 532 Needed to properly parse multiline strings. 533 Throws: 534 HjsonException if a block comment is not terminated before the end of the string. 535 Returns: 536 `true` if a line feed was skipped, `false` otherwise. This is needed because 537 line feeds can be used to separate aggregate members similar to commas. 538 */ 539 bool skipWC(ref string hjson, ref size_t collumn) 540 { 541 bool skippedLF = false; 542 543 while(!hjson.empty) 544 { 545 bool finished = true; 546 547 //Whitespace 548 while(!hjson.empty && hjson.front.isWhite) 549 { 550 if(hjson.front == '\n') 551 { 552 skippedLF = true; 553 collumn = 0; 554 } 555 else ++collumn; 556 hjson.popFront; 557 finished = false; 558 } 559 //Comments 560 if(!hjson.empty) 561 { 562 if(hjson.front == '#' || hjson.save.startsWith("//")) 563 { 564 hjson = hjson.find('\n'); 565 collumn = 0; 566 finished = false; 567 } 568 else if(hjson.save.startsWith("/*")) 569 { 570 hjson.popFrontN(2); 571 while(!hjson.save.startsWith("*/")) 572 { 573 enforce(!hjson.empty, "Unterminated block comment (missing */)"); 574 if(hjson.front == '\n') collumn = 0; 575 else ++collumn; 576 hjson.popFront; 577 } 578 hjson.popFrontN(2); 579 collumn += 2; 580 finished = false; 581 } 582 } 583 if(finished) break; 584 } 585 return skippedLF; 586 } 587 588 /*@("skipWC") unittest 589 { 590 string text = " \t \r "; 591 assert(!skipWC(text)); 592 assert(text.empty); 593 594 text = " \t hello"; 595 assert(!skipWC(text)); 596 assert(text == "hello"); 597 598 text = " \n "; 599 assert(skipWC(text)); 600 assert(text.empty); 601 }*/ 602 603 alias enforce = _enforce!HjsonException; 604 605 /** Checks whether given `dchar` is a Hjson punctuator. 606 Hjson quoteless strings may not start with a punctuator, 607 and quoteless object keys may not contain any punctuators. 608 */ 609 bool isPunctuator(dchar c) 610 { 611 return "{}[],:"d.canFind(c); 612 } 613 614 version(unittest): 615 version(Have_unit_threaded): 616 617 import std.format : format; 618 import std.json : parseJSON; 619 import std.range : chain, only, iota; 620 621 import unit_threaded; 622 623 static foreach(testName; [ 624 "charset", 625 "charset2", 626 "comments", 627 "empty", 628 "kan", 629 "keys", 630 "oa", 631 "passSingle", 632 "stringify1", 633 "strings", 634 "strings2", 635 "trail" 636 ]) { 637 @testName unittest 638 { 639 immutable json = import(testName~"_result.json"), 640 hjsonResult = import(testName~"_result.hjson"), 641 hjsonTest = import(testName~"_test.hjson"); 642 643 hjsonTest.parseHjson.should == json.parseJSON; 644 hjsonResult.parseHjson.should == json.parseJSON; 645 json.parseHjson.should == json.parseJSON; 646 } 647 } 648 static foreach(testName; [ 649 "mltabs", 650 "pass1", 651 "pass2", 652 "pass3", 653 "pass4" 654 ]) { 655 @testName unittest 656 { 657 immutable hjson = import(testName~"_result.hjson"), 658 json = import(testName~"_result.json"); 659 660 hjson.parseHjson.should == json.parseJSON; 661 json.parseHjson.should == json.parseJSON; 662 } 663 } 664 665 static foreach(failNr; chain( 666 only(2), 667 iota(5,7), 668 iota(11,18), 669 iota(19,24), 670 only(26), 671 iota(28,34) 672 )) { 673 @Tags("invalid_input") 674 @format("failJSON%d", failNr) unittest 675 { 676 immutable json = import("failJSON%02d_test.json".format(failNr)); 677 json.parseHjson.shouldThrow!HjsonException; 678 } 679 } 680 681 static foreach(failNr; [7,8,10,34]) 682 { 683 @Tags("invalid_input") 684 @ShouldFail("Hjson-d does not attempt to validate the rest of input after parsing a valid Hjson value.") 685 @format("failJSON%d", failNr) unittest 686 { 687 immutable json = import("failJSON%02d_test.json".format(failNr)); 688 json.parseHjson.shouldThrow!HjsonException; 689 } 690 }