iXML Community Group Test Suite
23 Oct 2023 (22 Nov 2023)
Top-level catalog for tests in the iXML Community Group Test Suite.
Tests have been contributed from several sources, but the core of the test collection are the tests contributed by Steven Pemberton in December 2021.
Misc tests 3
28 Jun 2022
Grammars 41-60.
Tests compiled manually in 2018 and 2019, re-packaged and extended (supplying test cases where needed) in 2022.
Note that some tests have alternate results for processors operating in non-standard modes, in particular modes in which they tolerate multiple definitions and undefined nonterminals or in which they do not tolerate non-productive nonterminals or unreachable nonterminals.
For a description of the form in which alternate results are recorded, see tests/misc-grammar/test-catalog.xml.
sample.grammar.41
Created 08 Feb 2022 by cmsmcq
Sample grammar from Niklaus Wirth, Grundlagen und Techniken des Compilerbaus (Bonn: Addison-Wesley, 1996), pp. 36-37.
Grammar for Oberon-0, the subset of Oberon for which the book describes a compiler.
Rules for whitespace and comments added for usability; Wirth assumes a lexer to eat whitespace. This grammar experiments with a pattern in which nonterminals normally begin and end with non-whitespace characters, so in the XML any whitespace goes outside the tags, not inside. (A different style is shown in sample.grammar.41bis.)
(For what it's worth: in some ways the raw parse tree produced by this grammar is nicer than the raw parse tree produced by grammar 41bis, but optional nonterminals and nonterminals which can produce the empty string cause ambiguity unless they are handled specially.)
Invisible XML Grammar
{ Sample grammar from Niklaus Wirth, Grundlagen und Techniken des
Compilerbaus (Bonn: Addison-Wesley, 1996), pp. 36-37.
Oberon-0.
Rules for ws added for convenience; Wirth assumes the lexer
eats whitespace. }
{ Revisions:
2022-06-15 : CMSMcQ : tweaks to align output with grammar 41bis
2022-06-14 : CMSMcQ : revise S handling (tight tags, where possible).
Hide most literals, use pre-terminal symbols.
2018-08-10 : CMSMcQ : correct syntax errors found by DCG parser
2018-08-09 : CMSMcQ : made first transcription; needs testing, since
the translation from Wirth's EBNF is error-prone.
}
{ We move the rule for module to the front, since it is the
start symbol. }
module = S?, -"MODULE", S, @ident, SEMI,
declarations,
(-"BEGIN", StatementSequence)?,
-"END", S, @ident_close, S?, -".", S?.
ident = letter, (letter; digit)*.
ident_close = ident.
integer = digit, (digit)*.
selector = (member; subscript)++(S?).
member = -".", S?, ident.
subscript = -"[", S?, expression, S?, -"]".
{selector = (".", S?, ident; "[", S?, expression, S?, "]")++S.}
number = integer.
factor = ident, (S?, selector)?
; number
; -"(", S?, expression, S?, -")"
; "~", S?, factor
.
term = factor++(S?, (TIMES; DIV; MOD; AND), S?).
SimpleExpression = (PLUS; MINUS)?, term++(S?, (PLUS; MINUS; OR), S?).
{ N.B. Simplexpression allows 1, -1, 1 + 1,
-1 + 1, but not 1 + -1.
I expect that's intentional.
I've made it also forbid white space between sign and term.
}
expression = SimpleExpression, (S?, -COMPARATOR, S?, SimpleExpression)?.
-COMPARATOR = EQL; NEQ; LSS; LEQ; GTR; GEQ.
assignment = ident, S?, (selector, S?)?, -":=", S?, expression.
ActualParameters = -"(", S?, expression**(S?, -",", S?), S?, -")".
ProcedureCall = ident, (S?, selector)?, (S?, ActualParameters)?.
IfStatement = -"IF", S?, condition, S?,
-"THEN", then-stmts,
(-"ELSIF", S?, condition, S?,
-"THEN", then-stmts)*,
(-"ELSE", else-stmts)?,
-"END".
condition = -expression.
then-stmts = -StatementSequence.
else-stmts = -StatementSequence.
WhileStatement = -"WHILE", S?, condition, S?,
-"DO", do-statements,
-"END".
do-statements = -StatementSequence.
statement = S?, ((assignment
; ProcedureCall
; IfStatement
; WhileStatement), S?)?.
StatementSequence = statement++ -";".
{ Wirth's formulation is closer to the following, but that complicates whitespace
handling.
statement = (assignment; ProcedureCall; IfStatement; WhileStatement)?.
StatementSequence = statement++SEMI.
}
IdentList = ident++(-",", S?).
ArrayType = -"ARRAY", S?, expression, S?, -"OF", S?, type.
FieldList = (IdentList, COLON, type)?.
RecordType = -"RECORD", S?, FieldList++SEMI, S?, -"END", S?.
type = ident
; ArrayType
; RecordType
.
FPSection = (-"VAR", S)?, IdentList, COLON, type.
FormalParameters = -"(", S?, FPSection**SEMI, S?, -")".
ProcedureHeading = -"PROCEDURE", S, ident, S?, FormalParameters?.
ProcedureBody = declarations,
(-"BEGIN", StatementSequence)?,
-"END", S, @ident.
ProcedureDeclaration
= ProcedureHeading, SEMI, ProcedureBody.
declarations = (constants, S?)?, (types, S?)?, (variables, S?)?, (procedures, S?)?.
constants = -"CONST", (S, (ident, EQUALS, expression, S?, -";")++S)?.
types = -"TYPE", (S, (IdentList, EQUALS, type, S?, -";")++S)?.
variables = -"VAR", (S, (IdentList, COLON, type, S?, -";")++S)?.
procedures = (ProcedureDeclaration, S?, -";")++S.
{ S and comment added here to make the grammar usable without
a scanner. N.B. Comments nest. }
-S = (ws; comment)+.
-ws = -[" "; #09; #0A; #0D].
comment = -"(*", comment-body, -"*)".
comment-body = comment-chars, ((comment++comment-chars), comment-chars?)?.
-comment-chars = (cc1; cc2; cc3)+, star*, lpar* | star+, lpar* | lpar+.
-cc1 = not-star-or-lpar.
-cc2 = lpar+, not-star-or-lpar.
-cc3 = star+, lpar+, not-star-or-lpar
| star+, not-star-or-lrpar.
-not-star-or-lpar = ~["*("].
-not-star-or-lrpar = ~["(*)"].
-lpar = "(".
-star = "*".
-digit = ['0'-'9'].
-letter = ['a'-'z'; 'A'-'Z'].
TIMES = -"*".
DIV = -"DIV".
MOD = -"MOD".
AND = -"AND".
PLUS = -"+".
MINUS = -"-".
OR = -"OR".
EQL = -"=".
NEQ = -"#".
LSS = -"<".
LEQ = -"<=".
GTR = -">".
GEQ = -">=".
{ Separators }
-SEMI = S?, -";", S?.
-COLON = S?, -":", S?.
-EQUALS = S?, -"=", S?.
Test case: g41.c02
Repository URI: …/tests/misc/misc-041-060-catalog.xml
Created 15 Jun 2022 by cmsmcq
Error case generated by random modification of g41.c01.
In the Divide routine, the assignment w := 2 * w was modified by replacing the number 2 with an instance of the nonterminal 'types' (part of 'declarations').
Input string (935 characters)
MODULE Samples;
(* Wirth 1995, pp. 37-38 *)
PROCEDURE Multiply;
VAR x, y, z: INTEGER;
BEGIN Read(x); Read(y); z := 0;
WHILE x > 0 DO
IF x MOD 2 = 1 THEN z := z + y END ;
y := 2*y; x := x DIV 2
END ;
Write(x); Write(y); Write(z); WriteLn
END Multiply;
PROCEDURE Divide;
VAR x, y, r, q, w: INTEGER;
BEGIN Read(x); Read(y); r := x; q := 0; w := y;
WHILE w <= r DO w := TYPE bogon, bogotic = RECORD x, y: INTEGER END *w END ;
WHILE w > y DO
q := 2*q; w := w DIV 2;
IF w <= r THEN r := r - w; q := q + 1 END
END ;
Write(x); Write(y); Write(q); Write(r); WriteLn
END Divide;
PROCEDURE BinSearch;
VAR i, j, k, n: INTEGER;
a: ARRAY 32 OF INTEGER;
BEGIN Read(n); k := 0;
WHILE k < n DO Read(a[k]); k := k + 1 END ;
i := 0; j := n;
WHILE i < j DO
k := (i+j) DIV 2;
IF x < a[k] THEN j := k ELSE i := k + 1 END
END ;
Write(i); Write(j); Write(a[j]); WriteLn
END BinSearch;
END Samples.
Expected result
The input does not match the grammar.