1 module gherkin.parser;
2 
3 import std.algorithm.searching : startsWith;
4 import std.array : array, empty, join;
5 import std.algorithm : map;
6 import std.conv : to;
7 import std.range : back, popBack, repeat, walkLength;
8 import std.regex : ctRegex, replace, split;
9 import std..string : chomp, replace, split, strip, stripLeft;
10 import std.stdio : File;
11 import std.typecons : Nullable;
12 
13 import gherkin;
14 
15 ///
16 enum Token
17 {
18     Language,
19     Feature,
20     Scenario,
21     Background,
22     Step,
23     Examples,
24     DocString,
25     Other,
26     TableRow,
27     Comment,
28     Tag,
29     EmptyLine,
30     Null
31 }
32 
33 ///
34 struct LineToken
35 {
36     ///
37     Token token;
38     ///
39     string keyword;
40     ///
41     string text;
42     ///
43     Location location;
44 }
45 
46 ///
47 class Parser
48 {
49     ///
50     static GherkinDocument parse(string[] documentStrings, string uri)
51     {
52         ulong lineNumber;
53         ulong id;
54         Tag[] tags;
55         auto document = GherkinDocument(uri, documentStrings);
56 
57         LineToken getToken(string line, ulong lineNumber) //, Token[] tokenStack)
58         {
59             immutable Token[string] tokenStrings = [
60                 "#language:" : Token.Language, "Feature:" : Token.Feature,
61                 "Scenario:" : Token.Scenario, "Example:" : Token.Scenario,
62                 "Scenario Outline:" : Token.Scenario,
63                 "Background:" : Token.Background, "Given " : Token.Step,
64                 "When " : Token.Step, "Then " : Token.Step, "And " : Token.Step,
65                 "But " : Token.Step, "* " : Token.Step, "@" : Token.Tag,
66                 "Examples:" : Token.Examples, "#" : Token.Comment,
67                 `"""` : Token.DocString, "```" : Token.DocString,
68                 "|" : Token.TableRow
69             ];
70 
71             auto strippedLine = line.stripLeft;
72             auto indent = line.length - strippedLine.length;
73             auto token = Token.Other;
74             auto location = Location(indent + 1, lineNumber + 1);
75             string text;
76             string keyword;
77 
78             if (line.strip.length == 0)
79             {
80                 token = token.EmptyLine;
81             }
82             else
83             {
84                 foreach (t; tokenStrings.byKeyValue)
85                 {
86                     if (strippedLine.startsWith(t.key))
87                     {
88                         token = t.value;
89                         keyword = t.key;
90                         text = line[indent + keyword.walkLength .. $];
91                         if (token == Token.Comment)
92                         {
93                             location.column = 1;
94                         }
95                         break;
96                     }
97                 }
98             }
99 
100             return LineToken(token, keyword, text, location);
101         }
102 
103         void parseTag(LineToken token)
104         {
105             immutable auto line = documentStrings[lineNumber];
106             immutable auto strippedLine = line.strip;
107             immutable auto tagStrings = strippedLine.split(" ");
108 
109             auto column = token.location.column;
110             foreach (tagString; tagStrings)
111             {
112                 if (!tagString.empty)
113                 {
114                     tags ~= Tag(tagString, Location(column, lineNumber + 1));
115                 }
116                 column += tagString.walkLength + 1;
117             }
118         }
119 
120         DocString parseDocString(LineToken token)
121         {
122             string[] content;
123             auto line = documentStrings[lineNumber];
124             auto indent = token.location.column - 1;
125             auto indentSpaces = ' '.repeat(token.location.column - 1);
126             auto separator = token.keyword;
127             auto contentType = token.text;
128 
129             while (++lineNumber < documentStrings.length)
130             {
131                 line = documentStrings[lineNumber];
132                 auto lineToken = getToken(line, lineNumber);
133                 switch (lineToken.token)
134                 {
135                 case Token.Comment:
136                     document.comments ~= Comment(line, lineToken.location);
137                     break;
138                 case Token.DocString:
139                     if (line.stripLeft == separator)
140                     {
141                         return DocString(content.join("\n"), contentType,
142                                 separator, token.location);
143                     }
144                     goto default;
145                 default:
146                     if (line.startsWith(indentSpaces))
147                     {
148                         content ~= line[indent .. $].replace("\\\"", `"`);
149                     }
150                     else
151                     {
152                         content ~= line.stripLeft.replace("\\\"", `"`);
153                     }
154                 }
155             }
156             assert(0);
157         }
158 
159         TableRow[] parseTableRows()
160         {
161             TableRow[] tableRows;
162             while (lineNumber < documentStrings.length)
163             {
164                 auto line = documentStrings[lineNumber];
165                 auto lineToken = getToken(line, lineNumber);
166                 switch (lineToken.token)
167                 {
168                 case Token.TableRow:
169                     const auto cellStrings = line.replace(ctRegex!(`\|\s*$`),
170                             ``).split(ctRegex!(`(?<!\\)\|`));
171                     auto column = cellStrings[0].walkLength + 1;
172                     auto row = TableRow((id++).to!string, [], Location(column, lineNumber + 1));
173                     foreach (cellString; cellStrings[1 .. $])
174                     {
175                         string value;
176                         string strippedCellString = cellString.strip;
177                         ulong i;
178                         while (i < strippedCellString.length)
179                         {
180                             auto c = strippedCellString[i].to!string;
181                             i++;
182                             if (c == `\` && i < strippedCellString.length)
183                             {
184                                 c = strippedCellString[i].to!string;
185                                 i++;
186                                 if (c == `n`)
187                                 {
188                                     c = "\n";
189                                 }
190                                 else if (c != `|` && c != `\`)
191                                 {
192                                     value ~= "\\";
193                                 }
194                             }
195                             value ~= c;
196                         }
197                         row.cells ~= Cell(value, Location(column + (cellString.walkLength - cellString.stripLeft()
198                                 .walkLength) + 1, lineNumber + 1));
199                         column += cellString.walkLength + 1;
200                     }
201                     tableRows ~= row;
202                     break;
203                 case Token.EmptyLine:
204                     break;
205                 case Token.Comment:
206                     document.comments ~= Comment(line, lineToken.location);
207                     break;
208                 default:
209                     lineNumber--;
210                     return tableRows;
211                 }
212 
213                 lineNumber++;
214             }
215             return tableRows;
216         }
217 
218         Step parseStep(LineToken token, Scenario parent)
219         {
220             auto line = documentStrings[lineNumber];
221             Step step = Step(token.keyword, token.text, token.location, parent);
222 
223             while (++lineNumber < documentStrings.length)
224             {
225                 line = documentStrings[lineNumber];
226                 auto lineToken = getToken(line, lineNumber);
227                 switch (lineToken.token)
228                 {
229                 case Token.DocString:
230                     step.docString = parseDocString(lineToken);
231                     break;
232                 case Token.TableRow:
233                     step.dataTable = DataTable(parseTableRows(),
234                             lineToken.location);
235                     break;
236                 case Token.Comment:
237                     document.comments ~= Comment(line, lineToken.location);
238                     break;
239                 case Token.EmptyLine:
240                     break;
241                 default:
242                     lineNumber--;
243                     step.id = (id++).to!string;
244                     return step;
245                 }
246             }
247             step.id = (id++).to!string;
248             return step;
249         }
250 
251         string parseDescription()
252         {
253             auto line = documentStrings[lineNumber];
254             string[] descriptions = [line];
255 
256             string[] stripTail(string[] descriptions)
257             {
258                 while (!descriptions.empty)
259                 {
260                     if (descriptions.back.length > 0)
261                     {
262                         break;
263                     }
264                     descriptions.popBack;
265                 }
266                 return descriptions;
267             }
268 
269             while (++lineNumber < documentStrings.length)
270             {
271                 line = documentStrings[lineNumber];
272                 auto lineToken = getToken(line, lineNumber);
273                 switch (lineToken.token)
274                 {
275                 case Token.Comment:
276                     document.comments ~= Comment(line, lineToken.location);
277                     break;
278                 case Token.EmptyLine:
279                 case Token.Other:
280                     descriptions ~= line.replace("\\\\", `\`);
281                     break;
282                 default:
283                     lineNumber--;
284                     return stripTail(descriptions).join("\n");
285                 }
286             }
287 
288             return stripTail(descriptions).join("\n");
289         }
290 
291         Examples parseExamples(LineToken token)
292         {
293             auto line = documentStrings[lineNumber];
294             TableRow[] tableRows;
295             Nullable!string description;
296             Tag[] examplesTags;
297             if (!tags.empty)
298             {
299                 examplesTags = tags;
300                 tags = [];
301             }
302 
303             Examples finalize()
304             {
305                 Nullable!TableRow tableHeader;
306                 TableRow[] tableBody;
307                 if (!tableRows.empty)
308                 {
309                     tableHeader = tableRows[0];
310                     if (tableRows.length > 1)
311                     {
312                         tableBody = tableRows[1 .. $];
313                     }
314                 }
315                 auto examples = Examples(token.keyword[0 .. $ - 1],
316                         token.text.stripLeft, token.location, tableHeader, tableBody);
317                 foreach (i, tag; examplesTags)
318                 {
319                     tag.id = (id++).to!string;
320                     examples.tags ~= tag;
321                 }
322                 if (!description.isNull)
323                 {
324                     examples.description = description;
325                 }
326 
327                 return examples;
328             }
329 
330             while (++lineNumber < documentStrings.length)
331             {
332                 line = documentStrings[lineNumber];
333                 auto lineToken = getToken(line, lineNumber);
334                 switch (lineToken.token)
335                 {
336                 case Token.TableRow:
337                     tableRows = parseTableRows();
338                     break;
339                 case Token.Comment:
340                     document.comments ~= Comment(line, lineToken.location);
341                     break;
342                 case Token.Other:
343                     description = parseDescription();
344                     break;
345                 case Token.EmptyLine:
346                     break;
347                 default:
348                     lineNumber--;
349                     return finalize;
350                 }
351             }
352 
353             return finalize;
354         }
355 
356         Scenario parseScenario(LineToken token, Feature feature)
357         {
358             auto line = documentStrings[lineNumber];
359             auto scenario = new Scenario(token.keyword[0 .. $ - 1], token.text.stripLeft,
360                     token.location, feature, token.token == Token.Background);
361             if (!tags.empty)
362             {
363                 scenario.tags = tags;
364                 tags = [];
365             }
366 
367             void update_ids()
368             {
369                 foreach (i, tag; scenario.tags)
370                 {
371                     scenario.tags[i].id = (id++).to!string;
372                 }
373                 if (token.token != Token.Background)
374                     scenario.id = (id++).to!string;
375             }
376 
377             while (++lineNumber < documentStrings.length)
378             {
379                 line = documentStrings[lineNumber];
380                 auto lineToken = getToken(line, lineNumber);
381                 switch (lineToken.token)
382                 {
383                 case Token.Step:
384                     scenario.steps ~= parseStep(lineToken, scenario);
385                     break;
386                 case Token.Examples:
387                     scenario.examples ~= parseExamples(lineToken);
388                     scenario.isScenarioOutline = true;
389                     break;
390                 case Token.Tag:
391                     parseTag(lineToken);
392                     break;
393                 case Token.Other:
394                     scenario.description = parseDescription();
395                     break;
396                 case Token.Comment:
397                     document.comments ~= Comment(line, lineToken.location);
398                     break;
399                 case Token.EmptyLine:
400                     break;
401                 default:
402                     update_ids();
403                     lineNumber--;
404                     return scenario;
405                 }
406             }
407             update_ids();
408             return scenario;
409         }
410 
411         Feature parseFeature(LineToken token, GherkinDocument document)
412         {
413             auto line = documentStrings[lineNumber];
414             auto feature = new Feature(token.keyword[0 .. $ - 1],
415                     token.text.stripLeft, token.location, document);
416             if (!tags.empty)
417             {
418                 feature.tags = tags;
419                 tags = [];
420             }
421 
422             while (++lineNumber < documentStrings.length)
423             {
424                 line = documentStrings[lineNumber];
425                 auto lineToken = getToken(line, lineNumber);
426                 switch (lineToken.token)
427                 {
428                 case Token.Background:
429                     feature.background = parseScenario(lineToken, feature);
430                     break;
431                 case Token.Scenario:
432                     feature.scenarios ~= parseScenario(lineToken, feature);
433                     break;
434                 case Token.Other:
435                     feature.description = parseDescription();
436                     break;
437                 case Token.Tag:
438                     parseTag(lineToken);
439                     break;
440                 case Token.Comment:
441                     document.comments ~= Comment(line, lineToken.location);
442                     break;
443                 case Token.EmptyLine:
444                     break;
445                 default:
446                     // do nothing
447                 }
448             }
449             foreach (i, tag; feature.tags)
450             {
451                 feature.tags[i].id = (id++).to!string;
452             }
453             return feature;
454         }
455 
456         GherkinDocument parseDocument()
457         {
458             string language = "en";
459 
460             while (lineNumber < documentStrings.length)
461             {
462                 auto line = documentStrings[lineNumber];
463                 auto lineToken = getToken(line, lineNumber);
464                 switch (lineToken.token)
465                 {
466                 case Token.Language:
467                     language = lineToken.text.strip;
468                     break;
469                 case Token.Feature:
470                     document.feature = parseFeature(lineToken, document);
471                     break;
472                 case Token.Tag:
473                     parseTag(lineToken);
474                     break;
475                 case Token.Comment:
476                     document.comments ~= Comment(line, lineToken.location);
477                     break;
478                 case Token.EmptyLine:
479                     break;
480                 default:
481                     //do nothing
482                 }
483                 lineNumber++;
484             }
485 
486             if (!document.feature.isNull)
487             {
488                 document.feature.get.language = language;
489             }
490 
491             return document;
492         }
493 
494         return parseDocument;
495     }
496 
497     ///
498     static GherkinDocument parseFromFile(string uri)
499     {
500         static string nbsp = "\xc2\xa0";
501 
502         auto file = File(uri, "r");
503         return parse(file.byLine.map!(x => x.to!string.chomp.replace(nbsp, ` `)).array, uri);
504     }
505 
506     unittest
507     {
508         import std.algorithm : canFind;
509         import std.file : readText;
510         import std.json : parseJSON;
511         import std.path : baseName;
512         import std..string : replace;
513         import unit_threaded.assertions : should;
514 
515         import glob : glob;
516 
517         const auto ignoredFeatureFiles = [
518             // dfmt off
519             // good
520             "complex_background",
521             "i18n_emoji",
522             "i18n_fr",
523             "i18n_no",
524             "rule",
525             "rule_without_name_and_description",
526             "spaces_in_language",
527             // bad
528             "inconsistent_cell_count",
529             "invalid_language",
530             "multiple_parser_errors",
531             "not_gherkin",
532             "single_parser_error",
533             "unexpected_eof"
534             // dfmt on
535         ];
536 
537         foreach (featureFile; glob(`cucumber/gherkin/testdata/*/*.feature`))
538         {
539             if (ignoredFeatureFiles.canFind(baseName(featureFile, ".feature")))
540             {
541                 continue;
542             }
543             immutable auto expected = parseJSON(readText(featureFile ~ `.ast.ndjson`));
544             auto actual = parseFromFile(featureFile);
545 
546             actual.uri = actual.uri.replace("cucumber/gherkin/", "");
547             actual.toJSON.should == expected;
548         }
549     }
550 }