1 module gherkin.parser; 2 3 import std.algorithm.searching : startsWith; 4 import std.array : array, empty, join; 5 import std.algorithm : each, map; 6 import std.conv : to; 7 import std.range : back, popBack, repeat, walkLength; 8 import std.regex : ctRegex, replace, split; 9 import std.string : chomp, replace, split, strip, stripLeft; 10 import std.stdio : File; 11 12 import gherkin; 13 14 /// 15 enum Token 16 { 17 Language, 18 Feature, 19 Scenario, 20 ScenarioOutline, 21 Background, 22 Step, 23 Examples, 24 DocString, 25 Other, 26 TableRow, 27 Comment, 28 Tag, 29 EmptyLine, 30 Null 31 } 32 33 /// 34 struct LineToken 35 { 36 /// 37 Token token; 38 /// 39 string keyword; 40 /// 41 string text; 42 /// 43 Location location; 44 } 45 46 /// 47 class Parser 48 { 49 /// 50 static GherkinDocument parse(string[] documentStrings, string uri) 51 { 52 ulong lineNumber; 53 ulong id; 54 Tag[] tags; 55 Comment[] comments; 56 auto document = GherkinDocument(uri, documentStrings); 57 58 LineToken getToken(string line, ulong lineNumber) 59 { 60 immutable Token[string] tokenStrings = [ 61 "#language:" : Token.Language, "Feature:" : Token.Feature, 62 "Scenario:" : Token.Scenario, "Example:" : Token.Scenario, 63 "Scenario Outline:" : Token.ScenarioOutline, 64 "Background:" : Token.Background, "Given " : Token.Step, 65 "When " : Token.Step, "Then " : Token.Step, "And " : Token.Step, 66 "But " : Token.Step, "* " : Token.Step, "@" : Token.Tag, 67 "Examples:" : Token.Examples, "#" : Token.Comment, 68 `"""` : Token.DocString, "```" : Token.DocString, 69 "|" : Token.TableRow 70 ]; 71 72 auto strippedLine = line.stripLeft; 73 auto indent = line.length - strippedLine.length; 74 auto token = Token.Other; 75 auto location = Location(indent + 1, lineNumber + 1); 76 string text; 77 string keyword; 78 79 if (line.strip.length == 0) 80 { 81 token = token.EmptyLine; 82 } 83 else 84 { 85 foreach (t; tokenStrings.byKeyValue) 86 { 87 if (strippedLine.startsWith(t.key)) 88 { 89 token = t.value; 90 keyword = t.key; 91 text = line[indent + keyword.walkLength .. $]; 92 if (token == Token.Comment) 93 { 94 location.column = 1; 95 } 96 break; 97 } 98 } 99 } 100 101 return LineToken(token, keyword, text, location); 102 } 103 104 void parseTag(LineToken token) 105 { 106 immutable auto line = documentStrings[lineNumber]; 107 immutable auto strippedLine = line.strip; 108 immutable auto tagStrings = strippedLine.split(" "); 109 110 auto column = token.location.column; 111 foreach (tagString; tagStrings) 112 { 113 if (!tagString.empty) 114 { 115 tags ~= Tag(tagString, Location(column, lineNumber + 1)); 116 } 117 column += tagString.walkLength + 1; 118 } 119 } 120 121 DocString parseDocString(LineToken token) 122 { 123 string[] content; 124 auto line = documentStrings[lineNumber]; 125 auto indent = token.location.column - 1; 126 auto indentSpaces = ' '.repeat(token.location.column - 1); 127 auto separator = token.keyword; 128 auto contentType = token.text; 129 comments = []; 130 131 while (++lineNumber < documentStrings.length) 132 { 133 line = documentStrings[lineNumber]; 134 auto lineToken = getToken(line, lineNumber); 135 switch (lineToken.token) 136 { 137 case Token.Comment: 138 comments ~= Comment(line, lineToken.location); 139 document.comments ~= Comment(line, lineToken.location); 140 break; 141 case Token.DocString: 142 if (line.stripLeft == separator) 143 { 144 return DocString(content.join("\n"), contentType, 145 separator, token.location); 146 } 147 goto default; 148 default: 149 if (line.startsWith(indentSpaces)) 150 { 151 content ~= line[indent .. $].replace("\\\"", `"`); 152 } 153 else 154 { 155 content ~= line.stripLeft.replace("\\\"", `"`); 156 } 157 } 158 } 159 assert(0); 160 } 161 162 TableRow[] parseTableRows() 163 { 164 TableRow[] tableRows; 165 while (lineNumber < documentStrings.length) 166 { 167 auto line = documentStrings[lineNumber]; 168 auto lineToken = getToken(line, lineNumber); 169 switch (lineToken.token) 170 { 171 case Token.TableRow: 172 const auto cellStrings = line.replace(ctRegex!(`\|\s*$`), 173 ``).split(ctRegex!(`(?<!\\)\|`)); 174 auto column = cellStrings[0].walkLength + 1; 175 auto row = TableRow((id++).to!string, [], Location(column, 176 lineNumber + 1), comments); 177 foreach (cellString; cellStrings[1 .. $]) 178 { 179 string value; 180 string strippedCellString = cellString.strip; 181 ulong i; 182 while (i < strippedCellString.length) 183 { 184 auto c = strippedCellString[i].to!string; 185 i++; 186 if (c == `\` && i < strippedCellString.length) 187 { 188 c = strippedCellString[i].to!string; 189 i++; 190 if (c == `n`) 191 { 192 c = "\n"; 193 } 194 else if (c != `|` && c != `\`) 195 { 196 value ~= "\\"; 197 } 198 } 199 value ~= c; 200 } 201 row.cells ~= Cell(value, Location(column + (cellString.walkLength - cellString.stripLeft() 202 .walkLength) + 1, lineNumber + 1)); 203 column += cellString.walkLength + 1; 204 } 205 tableRows ~= row; 206 break; 207 case Token.EmptyLine: 208 break; 209 case Token.Comment: 210 comments ~= Comment(line, lineToken.location); 211 document.comments ~= Comment(line, lineToken.location); 212 break; 213 default: 214 lineNumber--; 215 return tableRows; 216 } 217 218 lineNumber++; 219 } 220 return tableRows; 221 } 222 223 Step parseStep(LineToken token, Scenario scenario) 224 { 225 auto line = documentStrings[lineNumber]; 226 Step step = Step(token.keyword, token.text, token.location, scenario.uri, comments); 227 228 while (++lineNumber < documentStrings.length) 229 { 230 line = documentStrings[lineNumber]; 231 auto lineToken = getToken(line, lineNumber); 232 switch (lineToken.token) 233 { 234 case Token.DocString: 235 step.docString = parseDocString(lineToken); 236 break; 237 case Token.TableRow: 238 comments = []; 239 auto dataTable = DataTable(parseTableRows(), lineToken.location); 240 dataTable.rows.each!(r => comments ~= r.comments); 241 step.dataTable = dataTable; 242 break; 243 case Token.Comment: 244 comments ~= Comment(line, lineToken.location); 245 document.comments ~= Comment(line, lineToken.location); 246 break; 247 case Token.EmptyLine: 248 break; 249 default: 250 lineNumber--; 251 step.id = (id++).to!string; 252 return step; 253 } 254 } 255 step.id = (id++).to!string; 256 return step; 257 } 258 259 string parseDescription() 260 { 261 auto line = documentStrings[lineNumber]; 262 string[] descriptions = [line]; 263 264 string[] stripTail(string[] descriptions) 265 { 266 while (!descriptions.empty) 267 { 268 if (descriptions.back.length > 0) 269 { 270 break; 271 } 272 descriptions.popBack; 273 } 274 return descriptions; 275 } 276 277 while (++lineNumber < documentStrings.length) 278 { 279 line = documentStrings[lineNumber]; 280 auto lineToken = getToken(line, lineNumber); 281 switch (lineToken.token) 282 { 283 case Token.Comment: 284 comments ~= Comment(line, lineToken.location); 285 document.comments ~= Comment(line, lineToken.location); 286 break; 287 case Token.EmptyLine: 288 case Token.Other: 289 descriptions ~= line.replace("\\\\", `\`); 290 break; 291 default: 292 lineNumber--; 293 return stripTail(descriptions).join("\n"); 294 } 295 } 296 297 return stripTail(descriptions).join("\n"); 298 } 299 300 Examples parseExamples(LineToken token) 301 { 302 auto line = documentStrings[lineNumber]; 303 TableRow[] tableRows; 304 string description; 305 Tag[] examplesTags = tags; 306 tags = []; 307 Comment[] examplesComments = comments; 308 comments = []; 309 310 Examples finalize() 311 { 312 auto examples = Examples(token.keyword[0 .. $ - 1], token.text.stripLeft, 313 token.location, tableRows, description, examplesComments); 314 foreach (i, tag; examplesTags) 315 { 316 tag.id = (id++).to!string; 317 examples.tags ~= tag; 318 } 319 320 return examples; 321 } 322 323 while (++lineNumber < documentStrings.length) 324 { 325 line = documentStrings[lineNumber]; 326 auto lineToken = getToken(line, lineNumber); 327 switch (lineToken.token) 328 { 329 case Token.TableRow: 330 tableRows = parseTableRows(); 331 break; 332 case Token.Comment: 333 comments ~= Comment(line, lineToken.location); 334 document.comments ~= Comment(line, lineToken.location); 335 break; 336 case Token.Other: 337 description = parseDescription(); 338 break; 339 case Token.EmptyLine: 340 break; 341 default: 342 lineNumber--; 343 return finalize; 344 } 345 } 346 347 return finalize; 348 } 349 350 Scenario parseScenario(LineToken token, Feature feature, bool isScenarioOutline = false) 351 { 352 auto line = documentStrings[lineNumber]; 353 auto scenario = new Scenario(token.keyword[0 .. $ - 1], token.text.stripLeft, 354 token.location, token.token == Token.Background, feature.uri, comments); 355 scenario.isScenarioOutline = isScenarioOutline; 356 scenario.tags = tags; 357 tags = []; 358 359 void update_ids() 360 { 361 foreach (i, tag; scenario.tags) 362 { 363 scenario.tags[i].id = (id++).to!string; 364 } 365 if (token.token != Token.Background) 366 scenario.id = (id++).to!string; 367 } 368 369 while (++lineNumber < documentStrings.length) 370 { 371 line = documentStrings[lineNumber]; 372 auto lineToken = getToken(line, lineNumber); 373 switch (lineToken.token) 374 { 375 case Token.Step: 376 scenario.steps ~= parseStep(lineToken, scenario); 377 break; 378 case Token.Examples: 379 scenario.examples ~= parseExamples(lineToken); 380 scenario.isScenarioOutline = true; 381 break; 382 case Token.Tag: 383 parseTag(lineToken); 384 break; 385 case Token.Other: 386 scenario.description = parseDescription(); 387 break; 388 case Token.Comment: 389 comments ~= Comment(line, lineToken.location); 390 document.comments ~= Comment(line, lineToken.location); 391 break; 392 case Token.EmptyLine: 393 break; 394 default: 395 update_ids(); 396 lineNumber--; 397 return scenario; 398 } 399 } 400 update_ids(); 401 return scenario; 402 } 403 404 Feature parseFeature(LineToken token, GherkinDocument document) 405 { 406 auto line = documentStrings[lineNumber]; 407 auto feature = new Feature(token.keyword[0 .. $ - 1], 408 token.text.stripLeft, token.location, document.uri, comments); 409 feature.tags = tags; 410 tags = []; 411 412 while (++lineNumber < documentStrings.length) 413 { 414 line = documentStrings[lineNumber]; 415 auto lineToken = getToken(line, lineNumber); 416 switch (lineToken.token) 417 { 418 case Token.Background: 419 feature.background = parseScenario(lineToken, feature); 420 break; 421 case Token.Scenario: 422 feature.scenarios ~= parseScenario(lineToken, feature); 423 break; 424 case Token.ScenarioOutline: 425 feature.scenarios ~= parseScenario(lineToken, feature, true); 426 break; 427 case Token.Other: 428 feature.description = parseDescription(); 429 break; 430 case Token.Tag: 431 parseTag(lineToken); 432 break; 433 case Token.Comment: 434 comments ~= Comment(line, lineToken.location); 435 document.comments ~= Comment(line, lineToken.location); 436 break; 437 case Token.EmptyLine: 438 break; 439 default: 440 // do nothing 441 } 442 } 443 foreach (i, tag; feature.tags) 444 { 445 feature.tags[i].id = (id++).to!string; 446 } 447 return feature; 448 } 449 450 GherkinDocument parseDocument() 451 { 452 string language = "en"; 453 454 while (lineNumber < documentStrings.length) 455 { 456 auto line = documentStrings[lineNumber]; 457 auto lineToken = getToken(line, lineNumber); 458 switch (lineToken.token) 459 { 460 case Token.Language: 461 language = lineToken.text.strip; 462 break; 463 case Token.Feature: 464 document.feature = parseFeature(lineToken, document); 465 break; 466 case Token.Tag: 467 parseTag(lineToken); 468 break; 469 case Token.Comment: 470 comments ~= Comment(line, lineToken.location); 471 document.comments ~= Comment(line, lineToken.location); 472 break; 473 case Token.EmptyLine: 474 break; 475 default: 476 //do nothing 477 } 478 lineNumber++; 479 } 480 481 if (!document.feature.isNull) 482 { 483 document.feature.get.language = language; 484 } 485 486 return document; 487 } 488 489 return parseDocument; 490 } 491 492 /// 493 static GherkinDocument parseFromFile(string uri) 494 { 495 static string nbsp = "\xc2\xa0"; 496 497 auto file = File(uri, "r"); 498 return parse(file.byLine.map!(x => x.to!string.chomp.replace(nbsp, ` `)).array, uri); 499 } 500 501 unittest 502 { 503 import asdf : serializeToJson; 504 import std.algorithm : canFind; 505 import std.file : readText; 506 import std.json : parseJSON; 507 import std.path : baseName, dirName; 508 import std.string : replace; 509 import unit_threaded.assertions : should; 510 511 const auto ignoredFeatureFiles = [ 512 // dfmt off 513 // good 514 "complex_background", 515 "i18n_emoji", 516 "i18n_fr", 517 "i18n_no", 518 "rule", 519 "rule_without_name_and_description", 520 "spaces_in_language", 521 // bad 522 "inconsistent_cell_count", 523 "invalid_language", 524 "multiple_parser_errors", 525 "not_gherkin", 526 "single_parser_error", 527 "unexpected_eof" 528 // dfmt on 529 ]; 530 531 foreach (featureFile; getFeatureFiles([ 532 ``, __FILE__.dirName ~ "/../../cucumber/gherkin/testdata/" 533 ])) 534 { 535 if (ignoredFeatureFiles.canFind(baseName(featureFile, ".feature"))) 536 { 537 continue; 538 } 539 auto expected = parseJSON(readText(featureFile ~ `.ast.ndjson`)); 540 auto actual = parseFromFile(featureFile); 541 542 actual.uri = actual.uri.replace(__FILE__.dirName ~ "/../../cucumber/gherkin/", ""); 543 parseJSON(actual.serializeToJson).should == expected["gherkinDocument"]; 544 } 545 } 546 }