1 module gherkin.parser; 2 3 import std.algorithm.searching : startsWith; 4 import std.array : array, empty, join; 5 import std.algorithm : map; 6 import std.conv : to; 7 import std.range : back, popBack, repeat, walkLength; 8 import std.regex : ctRegex, replace, split; 9 import std..string : chomp, replace, split, strip, stripLeft; 10 import std.stdio : File; 11 import std.typecons : Nullable; 12 13 import gherkin; 14 15 /// 16 enum Token 17 { 18 Language, 19 Feature, 20 Scenario, 21 Background, 22 Step, 23 Examples, 24 DocString, 25 Other, 26 TableRow, 27 Comment, 28 Tag, 29 EmptyLine, 30 Null 31 } 32 33 /// 34 struct LineToken 35 { 36 /// 37 Token token; 38 /// 39 string keyword; 40 /// 41 string text; 42 /// 43 Location location; 44 } 45 46 /// 47 class Parser 48 { 49 /// 50 static GherkinDocument parse(string[] documentStrings, string uri) 51 { 52 ulong lineNumber; 53 ulong id; 54 Tag[] tags; 55 auto document = GherkinDocument(uri, documentStrings); 56 57 LineToken getToken(string line, ulong lineNumber) //, Token[] tokenStack) 58 { 59 immutable Token[string] tokenStrings = [ 60 "#language:" : Token.Language, "Feature:" : Token.Feature, 61 "Scenario:" : Token.Scenario, "Example:" : Token.Scenario, 62 "Scenario Outline:" : Token.Scenario, 63 "Background:" : Token.Background, "Given " : Token.Step, 64 "When " : Token.Step, "Then " : Token.Step, "And " : Token.Step, 65 "But " : Token.Step, "* " : Token.Step, "@" : Token.Tag, 66 "Examples:" : Token.Examples, "#" : Token.Comment, 67 `"""` : Token.DocString, "```" : Token.DocString, 68 "|" : Token.TableRow 69 ]; 70 71 auto strippedLine = line.stripLeft; 72 auto indent = line.length - strippedLine.length; 73 auto token = Token.Other; 74 auto location = Location(indent + 1, lineNumber + 1); 75 string text; 76 string keyword; 77 78 if (line.strip.length == 0) 79 { 80 token = token.EmptyLine; 81 } 82 else 83 { 84 foreach (t; tokenStrings.byKeyValue) 85 { 86 if (strippedLine.startsWith(t.key)) 87 { 88 token = t.value; 89 keyword = t.key; 90 text = line[indent + keyword.walkLength .. $]; 91 if (token == Token.Comment) 92 { 93 location.column = 1; 94 } 95 break; 96 } 97 } 98 } 99 100 return LineToken(token, keyword, text, location); 101 } 102 103 void parseTag(LineToken token) 104 { 105 immutable auto line = documentStrings[lineNumber]; 106 immutable auto strippedLine = line.strip; 107 immutable auto tagStrings = strippedLine.split(" "); 108 109 auto column = token.location.column; 110 foreach (tagString; tagStrings) 111 { 112 if (!tagString.empty) 113 { 114 tags ~= Tag(tagString, Location(column, lineNumber + 1)); 115 } 116 column += tagString.walkLength + 1; 117 } 118 } 119 120 DocString parseDocString(LineToken token) 121 { 122 string[] content; 123 auto line = documentStrings[lineNumber]; 124 auto indent = token.location.column - 1; 125 auto indentSpaces = ' '.repeat(token.location.column - 1); 126 auto separator = token.keyword; 127 auto contentType = token.text; 128 129 while (++lineNumber < documentStrings.length) 130 { 131 line = documentStrings[lineNumber]; 132 auto lineToken = getToken(line, lineNumber); 133 switch (lineToken.token) 134 { 135 case Token.Comment: 136 document.comments ~= Comment(line, lineToken.location); 137 break; 138 case Token.DocString: 139 if (line.stripLeft == separator) 140 { 141 return DocString(content.join("\n"), contentType, 142 separator, token.location); 143 } 144 goto default; 145 default: 146 if (line.startsWith(indentSpaces)) 147 { 148 content ~= line[indent .. $].replace("\\\"", `"`); 149 } 150 else 151 { 152 content ~= line.stripLeft.replace("\\\"", `"`); 153 } 154 } 155 } 156 assert(0); 157 } 158 159 TableRow[] parseTableRows() 160 { 161 TableRow[] tableRows; 162 while (lineNumber < documentStrings.length) 163 { 164 auto line = documentStrings[lineNumber]; 165 auto lineToken = getToken(line, lineNumber); 166 switch (lineToken.token) 167 { 168 case Token.TableRow: 169 const auto cellStrings = line.replace(ctRegex!(`\|\s*$`), 170 ``).split(ctRegex!(`(?<!\\)\|`)); 171 auto column = cellStrings[0].walkLength + 1; 172 auto row = TableRow((id++).to!string, [], Location(column, lineNumber + 1)); 173 foreach (cellString; cellStrings[1 .. $]) 174 { 175 string value; 176 string strippedCellString = cellString.strip; 177 ulong i; 178 while (i < strippedCellString.length) 179 { 180 auto c = strippedCellString[i].to!string; 181 i++; 182 if (c == `\` && i < strippedCellString.length) 183 { 184 c = strippedCellString[i].to!string; 185 i++; 186 if (c == `n`) 187 { 188 c = "\n"; 189 } 190 else if (c != `|` && c != `\`) 191 { 192 value ~= "\\"; 193 } 194 } 195 value ~= c; 196 } 197 row.cells ~= Cell(value, Location(column + (cellString.walkLength - cellString.stripLeft() 198 .walkLength) + 1, lineNumber + 1)); 199 column += cellString.walkLength + 1; 200 } 201 tableRows ~= row; 202 break; 203 case Token.EmptyLine: 204 break; 205 case Token.Comment: 206 document.comments ~= Comment(line, lineToken.location); 207 break; 208 default: 209 lineNumber--; 210 return tableRows; 211 } 212 213 lineNumber++; 214 } 215 return tableRows; 216 } 217 218 Step parseStep(LineToken token, Scenario parent) 219 { 220 auto line = documentStrings[lineNumber]; 221 Step step = Step(token.keyword, token.text, token.location, parent); 222 223 while (++lineNumber < documentStrings.length) 224 { 225 line = documentStrings[lineNumber]; 226 auto lineToken = getToken(line, lineNumber); 227 switch (lineToken.token) 228 { 229 case Token.DocString: 230 step.docString = parseDocString(lineToken); 231 break; 232 case Token.TableRow: 233 step.dataTable = DataTable(parseTableRows(), 234 lineToken.location); 235 break; 236 case Token.Comment: 237 document.comments ~= Comment(line, lineToken.location); 238 break; 239 case Token.EmptyLine: 240 break; 241 default: 242 lineNumber--; 243 step.id = (id++).to!string; 244 return step; 245 } 246 } 247 step.id = (id++).to!string; 248 return step; 249 } 250 251 string parseDescription() 252 { 253 auto line = documentStrings[lineNumber]; 254 string[] descriptions = [line]; 255 256 string[] stripTail(string[] descriptions) 257 { 258 while (!descriptions.empty) 259 { 260 if (descriptions.back.length > 0) 261 { 262 break; 263 } 264 descriptions.popBack; 265 } 266 return descriptions; 267 } 268 269 while (++lineNumber < documentStrings.length) 270 { 271 line = documentStrings[lineNumber]; 272 auto lineToken = getToken(line, lineNumber); 273 switch (lineToken.token) 274 { 275 case Token.Comment: 276 document.comments ~= Comment(line, lineToken.location); 277 break; 278 case Token.EmptyLine: 279 case Token.Other: 280 descriptions ~= line.replace("\\\\", `\`); 281 break; 282 default: 283 lineNumber--; 284 return stripTail(descriptions).join("\n"); 285 } 286 } 287 288 return stripTail(descriptions).join("\n"); 289 } 290 291 Examples parseExamples(LineToken token) 292 { 293 auto line = documentStrings[lineNumber]; 294 TableRow[] tableRows; 295 Nullable!string description; 296 Tag[] examplesTags; 297 if (!tags.empty) 298 { 299 examplesTags = tags; 300 tags = []; 301 } 302 303 Examples finalize() 304 { 305 Nullable!TableRow tableHeader; 306 TableRow[] tableBody; 307 if (!tableRows.empty) 308 { 309 tableHeader = tableRows[0]; 310 if (tableRows.length > 1) 311 { 312 tableBody = tableRows[1 .. $]; 313 } 314 } 315 auto examples = Examples(token.keyword[0 .. $ - 1], 316 token.text.stripLeft, token.location, tableHeader, tableBody); 317 foreach (i, tag; examplesTags) 318 { 319 tag.id = (id++).to!string; 320 examples.tags ~= tag; 321 } 322 if (!description.isNull) 323 { 324 examples.description = description; 325 } 326 327 return examples; 328 } 329 330 while (++lineNumber < documentStrings.length) 331 { 332 line = documentStrings[lineNumber]; 333 auto lineToken = getToken(line, lineNumber); 334 switch (lineToken.token) 335 { 336 case Token.TableRow: 337 tableRows = parseTableRows(); 338 break; 339 case Token.Comment: 340 document.comments ~= Comment(line, lineToken.location); 341 break; 342 case Token.Other: 343 description = parseDescription(); 344 break; 345 case Token.EmptyLine: 346 break; 347 default: 348 lineNumber--; 349 return finalize; 350 } 351 } 352 353 return finalize; 354 } 355 356 Scenario parseScenario(LineToken token, Feature feature) 357 { 358 auto line = documentStrings[lineNumber]; 359 auto scenario = new Scenario(token.keyword[0 .. $ - 1], token.text.stripLeft, 360 token.location, feature, token.token == Token.Background); 361 if (!tags.empty) 362 { 363 scenario.tags = tags; 364 tags = []; 365 } 366 367 void update_ids() 368 { 369 foreach (i, tag; scenario.tags) 370 { 371 scenario.tags[i].id = (id++).to!string; 372 } 373 if (token.token != Token.Background) 374 scenario.id = (id++).to!string; 375 } 376 377 while (++lineNumber < documentStrings.length) 378 { 379 line = documentStrings[lineNumber]; 380 auto lineToken = getToken(line, lineNumber); 381 switch (lineToken.token) 382 { 383 case Token.Step: 384 scenario.steps ~= parseStep(lineToken, scenario); 385 break; 386 case Token.Examples: 387 scenario.examples ~= parseExamples(lineToken); 388 scenario.isScenarioOutline = true; 389 break; 390 case Token.Tag: 391 parseTag(lineToken); 392 break; 393 case Token.Other: 394 scenario.description = parseDescription(); 395 break; 396 case Token.Comment: 397 document.comments ~= Comment(line, lineToken.location); 398 break; 399 case Token.EmptyLine: 400 break; 401 default: 402 update_ids(); 403 lineNumber--; 404 return scenario; 405 } 406 } 407 update_ids(); 408 return scenario; 409 } 410 411 Feature parseFeature(LineToken token, GherkinDocument document) 412 { 413 auto line = documentStrings[lineNumber]; 414 auto feature = new Feature(token.keyword[0 .. $ - 1], 415 token.text.stripLeft, token.location, document); 416 if (!tags.empty) 417 { 418 feature.tags = tags; 419 tags = []; 420 } 421 422 while (++lineNumber < documentStrings.length) 423 { 424 line = documentStrings[lineNumber]; 425 auto lineToken = getToken(line, lineNumber); 426 switch (lineToken.token) 427 { 428 case Token.Background: 429 feature.background = parseScenario(lineToken, feature); 430 break; 431 case Token.Scenario: 432 feature.scenarios ~= parseScenario(lineToken, feature); 433 break; 434 case Token.Other: 435 feature.description = parseDescription(); 436 break; 437 case Token.Tag: 438 parseTag(lineToken); 439 break; 440 case Token.Comment: 441 document.comments ~= Comment(line, lineToken.location); 442 break; 443 case Token.EmptyLine: 444 break; 445 default: 446 // do nothing 447 } 448 } 449 foreach (i, tag; feature.tags) 450 { 451 feature.tags[i].id = (id++).to!string; 452 } 453 return feature; 454 } 455 456 GherkinDocument parseDocument() 457 { 458 string language = "en"; 459 460 while (lineNumber < documentStrings.length) 461 { 462 auto line = documentStrings[lineNumber]; 463 auto lineToken = getToken(line, lineNumber); 464 switch (lineToken.token) 465 { 466 case Token.Language: 467 language = lineToken.text.strip; 468 break; 469 case Token.Feature: 470 document.feature = parseFeature(lineToken, document); 471 break; 472 case Token.Tag: 473 parseTag(lineToken); 474 break; 475 case Token.Comment: 476 document.comments ~= Comment(line, lineToken.location); 477 break; 478 case Token.EmptyLine: 479 break; 480 default: 481 //do nothing 482 } 483 lineNumber++; 484 } 485 486 if (!document.feature.isNull) 487 { 488 document.feature.get.language = language; 489 } 490 491 return document; 492 } 493 494 return parseDocument; 495 } 496 497 /// 498 static GherkinDocument parseFromFile(string uri) 499 { 500 static string nbsp = "\xc2\xa0"; 501 502 auto file = File(uri, "r"); 503 return parse(file.byLine.map!(x => x.to!string.chomp.replace(nbsp, ` `)).array, uri); 504 } 505 506 unittest 507 { 508 import std.algorithm : canFind; 509 import std.file : readText; 510 import std.json : parseJSON; 511 import std.path : baseName; 512 import std..string : replace; 513 import unit_threaded.assertions : should; 514 515 import glob : glob; 516 517 const auto ignoredFeatureFiles = [ 518 // dfmt off 519 // good 520 "complex_background", 521 "i18n_emoji", 522 "i18n_fr", 523 "i18n_no", 524 "rule", 525 "rule_without_name_and_description", 526 "spaces_in_language", 527 // bad 528 "inconsistent_cell_count", 529 "invalid_language", 530 "multiple_parser_errors", 531 "not_gherkin", 532 "single_parser_error", 533 "unexpected_eof" 534 // dfmt on 535 ]; 536 537 foreach (featureFile; glob(`cucumber/gherkin/testdata/*/*.feature`)) 538 { 539 if (ignoredFeatureFiles.canFind(baseName(featureFile, ".feature"))) 540 { 541 continue; 542 } 543 immutable auto expected = parseJSON(readText(featureFile ~ `.ast.ndjson`)); 544 auto actual = parseFromFile(featureFile); 545 546 actual.uri = actual.uri.replace("cucumber/gherkin/", ""); 547 actual.toJSON.should == expected; 548 } 549 } 550 }