1 module gherkin.parser;
2 
3 import std.algorithm.searching : startsWith;
4 import std.array : array, empty, join;
5 import std.algorithm : each, map;
6 import std.conv : to;
7 import std.range : back, popBack, repeat, walkLength;
8 import std.regex : ctRegex, replace, split;
9 import std.string : chomp, replace, split, strip, stripLeft;
10 import std.stdio : File;
11 
12 import gherkin;
13 
14 ///
15 enum Token
16 {
17     Language,
18     Feature,
19     Scenario,
20     ScenarioOutline,
21     Background,
22     Step,
23     Examples,
24     DocString,
25     Other,
26     TableRow,
27     Comment,
28     Tag,
29     EmptyLine,
30     Null
31 }
32 
33 ///
34 struct LineToken
35 {
36     ///
37     Token token;
38     ///
39     string keyword;
40     ///
41     string text;
42     ///
43     Location location;
44 }
45 
46 ///
47 class Parser
48 {
49     ///
50     static GherkinDocument parse(string[] documentStrings, string uri)
51     {
52         ulong lineNumber;
53         ulong id;
54         Tag[] tags;
55         Comment[] comments;
56         auto document = GherkinDocument(uri, documentStrings);
57 
58         LineToken getToken(string line, ulong lineNumber)
59         {
60             immutable Token[string] tokenStrings = [
61                 "#language:" : Token.Language, "Feature:" : Token.Feature,
62                 "Scenario:" : Token.Scenario, "Example:" : Token.Scenario,
63                 "Scenario Outline:" : Token.ScenarioOutline,
64                 "Background:" : Token.Background, "Given " : Token.Step,
65                 "When " : Token.Step, "Then " : Token.Step, "And " : Token.Step,
66                 "But " : Token.Step, "* " : Token.Step, "@" : Token.Tag,
67                 "Examples:" : Token.Examples, "#" : Token.Comment,
68                 `"""` : Token.DocString, "```" : Token.DocString,
69                 "|" : Token.TableRow
70             ];
71 
72             auto strippedLine = line.stripLeft;
73             auto indent = line.length - strippedLine.length;
74             auto token = Token.Other;
75             auto location = Location(indent + 1, lineNumber + 1);
76             string text;
77             string keyword;
78 
79             if (line.strip.length == 0)
80             {
81                 token = token.EmptyLine;
82             }
83             else
84             {
85                 foreach (t; tokenStrings.byKeyValue)
86                 {
87                     if (strippedLine.startsWith(t.key))
88                     {
89                         token = t.value;
90                         keyword = t.key;
91                         text = line[indent + keyword.walkLength .. $];
92                         if (token == Token.Comment)
93                         {
94                             location.column = 1;
95                         }
96                         break;
97                     }
98                 }
99             }
100 
101             return LineToken(token, keyword, text, location);
102         }
103 
104         void parseTag(LineToken token)
105         {
106             immutable auto line = documentStrings[lineNumber];
107             immutable auto strippedLine = line.strip;
108             immutable auto tagStrings = strippedLine.split(" ");
109 
110             auto column = token.location.column;
111             foreach (tagString; tagStrings)
112             {
113                 if (!tagString.empty)
114                 {
115                     tags ~= Tag(tagString, Location(column, lineNumber + 1));
116                 }
117                 column += tagString.walkLength + 1;
118             }
119         }
120 
121         DocString parseDocString(LineToken token)
122         {
123             string[] content;
124             auto line = documentStrings[lineNumber];
125             auto indent = token.location.column - 1;
126             auto indentSpaces = ' '.repeat(token.location.column - 1);
127             auto separator = token.keyword;
128             auto contentType = token.text;
129             comments = [];
130 
131             while (++lineNumber < documentStrings.length)
132             {
133                 line = documentStrings[lineNumber];
134                 auto lineToken = getToken(line, lineNumber);
135                 switch (lineToken.token)
136                 {
137                 case Token.Comment:
138                     comments ~= Comment(line, lineToken.location);
139                     document.comments ~= Comment(line, lineToken.location);
140                     break;
141                 case Token.DocString:
142                     if (line.stripLeft == separator)
143                     {
144                         return DocString(content.join("\n"), contentType,
145                                 separator, token.location);
146                     }
147                     goto default;
148                 default:
149                     if (line.startsWith(indentSpaces))
150                     {
151                         content ~= line[indent .. $].replace("\\\"", `"`);
152                     }
153                     else
154                     {
155                         content ~= line.stripLeft.replace("\\\"", `"`);
156                     }
157                 }
158             }
159             assert(0);
160         }
161 
162         TableRow[] parseTableRows()
163         {
164             TableRow[] tableRows;
165             while (lineNumber < documentStrings.length)
166             {
167                 auto line = documentStrings[lineNumber];
168                 auto lineToken = getToken(line, lineNumber);
169                 switch (lineToken.token)
170                 {
171                 case Token.TableRow:
172                     const auto cellStrings = line.replace(ctRegex!(`\|\s*$`),
173                             ``).split(ctRegex!(`(?<!\\)\|`));
174                     auto column = cellStrings[0].walkLength + 1;
175                     auto row = TableRow((id++).to!string, [], Location(column,
176                             lineNumber + 1), comments);
177                     foreach (cellString; cellStrings[1 .. $])
178                     {
179                         string value;
180                         string strippedCellString = cellString.strip;
181                         ulong i;
182                         while (i < strippedCellString.length)
183                         {
184                             auto c = strippedCellString[i].to!string;
185                             i++;
186                             if (c == `\` && i < strippedCellString.length)
187                             {
188                                 c = strippedCellString[i].to!string;
189                                 i++;
190                                 if (c == `n`)
191                                 {
192                                     c = "\n";
193                                 }
194                                 else if (c != `|` && c != `\`)
195                                 {
196                                     value ~= "\\";
197                                 }
198                             }
199                             value ~= c;
200                         }
201                         row.cells ~= Cell(value, Location(column + (cellString.walkLength - cellString.stripLeft()
202                                 .walkLength) + 1, lineNumber + 1));
203                         column += cellString.walkLength + 1;
204                     }
205                     tableRows ~= row;
206                     break;
207                 case Token.EmptyLine:
208                     break;
209                 case Token.Comment:
210                     comments ~= Comment(line, lineToken.location);
211                     document.comments ~= Comment(line, lineToken.location);
212                     break;
213                 default:
214                     lineNumber--;
215                     return tableRows;
216                 }
217 
218                 lineNumber++;
219             }
220             return tableRows;
221         }
222 
223         Step parseStep(LineToken token, Scenario scenario)
224         {
225             auto line = documentStrings[lineNumber];
226             Step step = Step(token.keyword, token.text, token.location, scenario.uri, comments);
227 
228             while (++lineNumber < documentStrings.length)
229             {
230                 line = documentStrings[lineNumber];
231                 auto lineToken = getToken(line, lineNumber);
232                 switch (lineToken.token)
233                 {
234                 case Token.DocString:
235                     step.docString = parseDocString(lineToken);
236                     break;
237                 case Token.TableRow:
238                     comments = [];
239                     auto dataTable = DataTable(parseTableRows(), lineToken.location);
240                     dataTable.rows.each!(r => comments ~= r.comments);
241                     step.dataTable = dataTable;
242                     break;
243                 case Token.Comment:
244                     comments ~= Comment(line, lineToken.location);
245                     document.comments ~= Comment(line, lineToken.location);
246                     break;
247                 case Token.EmptyLine:
248                     break;
249                 default:
250                     lineNumber--;
251                     step.id = (id++).to!string;
252                     return step;
253                 }
254             }
255             step.id = (id++).to!string;
256             return step;
257         }
258 
259         string parseDescription()
260         {
261             auto line = documentStrings[lineNumber];
262             string[] descriptions = [line];
263 
264             string[] stripTail(string[] descriptions)
265             {
266                 while (!descriptions.empty)
267                 {
268                     if (descriptions.back.length > 0)
269                     {
270                         break;
271                     }
272                     descriptions.popBack;
273                 }
274                 return descriptions;
275             }
276 
277             while (++lineNumber < documentStrings.length)
278             {
279                 line = documentStrings[lineNumber];
280                 auto lineToken = getToken(line, lineNumber);
281                 switch (lineToken.token)
282                 {
283                 case Token.Comment:
284                     comments ~= Comment(line, lineToken.location);
285                     document.comments ~= Comment(line, lineToken.location);
286                     break;
287                 case Token.EmptyLine:
288                 case Token.Other:
289                     descriptions ~= line.replace("\\\\", `\`);
290                     break;
291                 default:
292                     lineNumber--;
293                     return stripTail(descriptions).join("\n");
294                 }
295             }
296 
297             return stripTail(descriptions).join("\n");
298         }
299 
300         Examples parseExamples(LineToken token)
301         {
302             auto line = documentStrings[lineNumber];
303             TableRow[] tableRows;
304             string description;
305             Tag[] examplesTags = tags;
306             tags = [];
307             Comment[] examplesComments = comments;
308             comments = [];
309 
310             Examples finalize()
311             {
312                 auto examples = Examples(token.keyword[0 .. $ - 1], token.text.stripLeft,
313                         token.location, tableRows, description, examplesComments);
314                 foreach (i, tag; examplesTags)
315                 {
316                     tag.id = (id++).to!string;
317                     examples.tags ~= tag;
318                 }
319 
320                 return examples;
321             }
322 
323             while (++lineNumber < documentStrings.length)
324             {
325                 line = documentStrings[lineNumber];
326                 auto lineToken = getToken(line, lineNumber);
327                 switch (lineToken.token)
328                 {
329                 case Token.TableRow:
330                     tableRows = parseTableRows();
331                     break;
332                 case Token.Comment:
333                     comments ~= Comment(line, lineToken.location);
334                     document.comments ~= Comment(line, lineToken.location);
335                     break;
336                 case Token.Other:
337                     description = parseDescription();
338                     break;
339                 case Token.EmptyLine:
340                     break;
341                 default:
342                     lineNumber--;
343                     return finalize;
344                 }
345             }
346 
347             return finalize;
348         }
349 
350         Scenario parseScenario(LineToken token, Feature feature, bool isScenarioOutline = false)
351         {
352             auto line = documentStrings[lineNumber];
353             auto scenario = new Scenario(token.keyword[0 .. $ - 1], token.text.stripLeft,
354                     token.location, token.token == Token.Background, feature.uri, comments);
355             scenario.isScenarioOutline = isScenarioOutline;
356             scenario.tags = tags;
357             tags = [];
358 
359             void update_ids()
360             {
361                 foreach (i, tag; scenario.tags)
362                 {
363                     scenario.tags[i].id = (id++).to!string;
364                 }
365                 if (token.token != Token.Background)
366                     scenario.id = (id++).to!string;
367             }
368 
369             while (++lineNumber < documentStrings.length)
370             {
371                 line = documentStrings[lineNumber];
372                 auto lineToken = getToken(line, lineNumber);
373                 switch (lineToken.token)
374                 {
375                 case Token.Step:
376                     scenario.steps ~= parseStep(lineToken, scenario);
377                     break;
378                 case Token.Examples:
379                     scenario.examples ~= parseExamples(lineToken);
380                     scenario.isScenarioOutline = true;
381                     break;
382                 case Token.Tag:
383                     parseTag(lineToken);
384                     break;
385                 case Token.Other:
386                     scenario.description = parseDescription();
387                     break;
388                 case Token.Comment:
389                     comments ~= Comment(line, lineToken.location);
390                     document.comments ~= Comment(line, lineToken.location);
391                     break;
392                 case Token.EmptyLine:
393                     break;
394                 default:
395                     update_ids();
396                     lineNumber--;
397                     return scenario;
398                 }
399             }
400             update_ids();
401             return scenario;
402         }
403 
404         Feature parseFeature(LineToken token, GherkinDocument document)
405         {
406             auto line = documentStrings[lineNumber];
407             auto feature = new Feature(token.keyword[0 .. $ - 1],
408                     token.text.stripLeft, token.location, document.uri, comments);
409             feature.tags = tags;
410             tags = [];
411 
412             while (++lineNumber < documentStrings.length)
413             {
414                 line = documentStrings[lineNumber];
415                 auto lineToken = getToken(line, lineNumber);
416                 switch (lineToken.token)
417                 {
418                 case Token.Background:
419                     feature.background = parseScenario(lineToken, feature);
420                     break;
421                 case Token.Scenario:
422                     feature.scenarios ~= parseScenario(lineToken, feature);
423                     break;
424                 case Token.ScenarioOutline:
425                     feature.scenarios ~= parseScenario(lineToken, feature, true);
426                     break;
427                 case Token.Other:
428                     feature.description = parseDescription();
429                     break;
430                 case Token.Tag:
431                     parseTag(lineToken);
432                     break;
433                 case Token.Comment:
434                     comments ~= Comment(line, lineToken.location);
435                     document.comments ~= Comment(line, lineToken.location);
436                     break;
437                 case Token.EmptyLine:
438                     break;
439                 default:
440                     // do nothing
441                 }
442             }
443             foreach (i, tag; feature.tags)
444             {
445                 feature.tags[i].id = (id++).to!string;
446             }
447             return feature;
448         }
449 
450         GherkinDocument parseDocument()
451         {
452             string language = "en";
453 
454             while (lineNumber < documentStrings.length)
455             {
456                 auto line = documentStrings[lineNumber];
457                 auto lineToken = getToken(line, lineNumber);
458                 switch (lineToken.token)
459                 {
460                 case Token.Language:
461                     language = lineToken.text.strip;
462                     break;
463                 case Token.Feature:
464                     document.feature = parseFeature(lineToken, document);
465                     break;
466                 case Token.Tag:
467                     parseTag(lineToken);
468                     break;
469                 case Token.Comment:
470                     comments ~= Comment(line, lineToken.location);
471                     document.comments ~= Comment(line, lineToken.location);
472                     break;
473                 case Token.EmptyLine:
474                     break;
475                 default:
476                     //do nothing
477                 }
478                 lineNumber++;
479             }
480 
481             if (!document.feature.isNull)
482             {
483                 document.feature.get.language = language;
484             }
485 
486             return document;
487         }
488 
489         return parseDocument;
490     }
491 
492     ///
493     static GherkinDocument parseFromFile(string uri)
494     {
495         static string nbsp = "\xc2\xa0";
496 
497         auto file = File(uri, "r");
498         return parse(file.byLine.map!(x => x.to!string.chomp.replace(nbsp, ` `)).array, uri);
499     }
500 
501     unittest
502     {
503         import asdf : serializeToJson;
504         import std.algorithm : canFind;
505         import std.file : readText;
506         import std.json : parseJSON;
507         import std.path : baseName, dirName;
508         import std.string : replace;
509         import unit_threaded.assertions : should;
510 
511         const auto ignoredFeatureFiles = [
512             // dfmt off
513             // good
514             "complex_background",
515             "i18n_emoji",
516             "i18n_fr",
517             "i18n_no",
518             "rule",
519             "rule_without_name_and_description",
520             "spaces_in_language",
521             // bad
522             "inconsistent_cell_count",
523             "invalid_language",
524             "multiple_parser_errors",
525             "not_gherkin",
526             "single_parser_error",
527             "unexpected_eof"
528             // dfmt on
529         ];
530 
531         foreach (featureFile; getFeatureFiles([
532                     ``, __FILE__.dirName ~ "/../../cucumber/gherkin/testdata/"
533                 ]))
534         {
535             if (ignoredFeatureFiles.canFind(baseName(featureFile, ".feature")))
536             {
537                 continue;
538             }
539             auto expected = parseJSON(readText(featureFile ~ `.ast.ndjson`));
540             auto actual = parseFromFile(featureFile);
541 
542             actual.uri = actual.uri.replace(__FILE__.dirName ~ "/../../cucumber/gherkin/", "");
543             parseJSON(actual.serializeToJson).should == expected["gherkinDocument"];
544         }
545     }
546 }