iXML Community Group Test Suite

21 Jun 2022 (28 Jun 2022)

Top-level catalog for tests in the iXML Community Group Test Suite.

Tests have been contributed from several sources, but the core of the test collection are the tests contributed by Steven Pemberton in December 2021.

Misc tests 3

21 Jun 2022

Grammars 41-60.

Tests compiled manually in 2018 and 2019, re-packaged and extended (supplying test cases where needed) in 2022.

Note that some tests have alternate results for processors operating in non-standard modes, in particular modes in which they tolerate multiple definitions and undefined nonterminals or in which they do not tolerate non-productive nonterminals or unreachable nonterminals.

For a description of the form in which alternate results are recorded, see tests/misc-grammar/test-catalog.xml.

sample.grammar.41

Created 08 Feb 2022 by cmsmcq

Sample grammar from Niklaus Wirth, Grundlagen und Techniken des Compilerbaus (Bonn: Addison-Wesley, 1996), pp. 36-37.

Grammar for Oberon-0, the subset of Oberon for which the book describes a compiler.

Rules for whitespace and comments added for usability; Wirth assumes a lexer to eat whitespace. This grammar experiments with a pattern in which nonterminals normally begin and end with non-whitespace characters, so in the XML any whitespace goes outside the tags, not inside. (A different style is shown in sample.grammar.41bis.)

(For what it's worth: in some ways the raw parse tree produced by this grammar is nicer than the raw parse tree produced by grammar 41bis, but optional nonterminals and nonterminals which can produce the empty string cause ambiguity unless they are handled specially.)

Invisible XML Grammar

{ Sample grammar from Niklaus Wirth, Grundlagen und Techniken des
Compilerbaus (Bonn: Addison-Wesley, 1996), pp. 36-37.

Oberon-0.

Rules for ws added for convenience; Wirth assumes the lexer
eats whitespace. }

{ Revisions:
  2022-06-15 : CMSMcQ : tweaks to align output with grammar 41bis
  2022-06-14 : CMSMcQ : revise S handling (tight tags, where possible).
                        Hide most literals, use pre-terminal symbols.
  2018-08-10 : CMSMcQ : correct syntax errors found by DCG parser
  2018-08-09 : CMSMcQ : made first transcription; needs testing, since
      the translation from Wirth's EBNF is error-prone.
}

{ We move the rule for module to the front, since it is the
start symbol. }

            module = S?, -"MODULE", S, @ident, SEMI,
                     declarations,
                     (-"BEGIN", StatementSequence)?,
                     -"END", S, @ident_close, S?, -".", S?.
    
             ident = letter, (letter; digit)*.
       ident_close = ident.
           integer = digit, (digit)*.
          selector = (member; subscript)++(S?).
	    member = -".", S?, ident.
	 subscript = -"[", S?, expression, S?, -"]".
         {selector = (".", S?, ident; "[", S?, expression, S?, "]")++S.}
            number = integer.
            factor = ident, (S?, selector)?
	           ; number
		   ; -"(", S?, expression, S?, -")"
		   ; "~", S?, factor
		   .
              term = factor++(S?, (TIMES; DIV; MOD; AND), S?).
	      
  SimpleExpression =  (PLUS; MINUS)?, term++(S?, (PLUS; MINUS; OR), S?).

    { N.B. Simplexpression allows 1, -1, 1 + 1,
      -1 + 1, but not 1 + -1.
      I expect that's intentional.
      I've made it also forbid white space between sign and term.
    }    
 
        expression = SimpleExpression, (S?, -COMPARATOR, S?, SimpleExpression)?.
       -COMPARATOR = EQL; NEQ; LSS; LEQ; GTR; GEQ.
        assignment = ident, S?, (selector, S?)?, -":=", S?, expression.
  ActualParameters = -"(", S?, expression**(S?, -",", S?), S?, -")".
     ProcedureCall = ident, (S?, selector)?, (S?, ActualParameters)?.
       IfStatement = -"IF", S?, condition, S?, 
                     -"THEN", then-stmts,
                     (-"ELSIF", S?, condition, S?, 
		     -"THEN", then-stmts)*,
                     (-"ELSE", else-stmts)?,
		     -"END".
         condition = -expression.
        then-stmts = -StatementSequence.
        else-stmts = -StatementSequence.
    WhileStatement = -"WHILE", S?, condition, S?,
                     -"DO", do-statements,
		     -"END".
     do-statements = -StatementSequence.

         statement = S?, ((assignment
			 ; ProcedureCall
			 ; IfStatement
			 ; WhileStatement), S?)?.
 StatementSequence = statement++ -";".
 
 { Wirth's formulation is closer to the following, but that complicates whitespace
   handling. 

         statement = (assignment; ProcedureCall; IfStatement; WhileStatement)?.
 StatementSequence = statement++SEMI.
 }

         IdentList = ident++(-",", S?).
         ArrayType = -"ARRAY", S?, expression, S?, -"OF", S?, type.
         FieldList = (IdentList, COLON, type)?.
        RecordType = -"RECORD", S?, FieldList++SEMI, S?, -"END", S?.
              type = ident
	           ; ArrayType
		   ; RecordType
		   .
         FPSection = (-"VAR", S)?, IdentList, COLON, type.
  FormalParameters = -"(", S?, FPSection**SEMI, S?, -")".
  ProcedureHeading = -"PROCEDURE", S, ident, S?, FormalParameters?.
     ProcedureBody = declarations, 
                     (-"BEGIN", StatementSequence)?,
		     -"END", S, @ident.
ProcedureDeclaration
                   = ProcedureHeading, SEMI, ProcedureBody.
      declarations = (constants, S?)?, (types, S?)?, (variables, S?)?, (procedures, S?)?.
         constants = -"CONST", (S, (ident, EQUALS, expression, S?, -";")++S)?.
             types = -"TYPE", (S, (IdentList, EQUALS, type, S?, -";")++S)?.
         variables = -"VAR", (S, (IdentList, COLON, type, S?, -";")++S)?.
        procedures = (ProcedureDeclaration, S?, -";")++S.

{ S and comment added here to make the grammar usable without
  a scanner. N.B. Comments nest. }
                -S = (ws; comment)+.
               -ws = -[" "; #09; #0A; #0D].

           comment = -"(*", comment-body, -"*)".
      comment-body = comment-chars, ((comment++comment-chars), comment-chars?)?.
    -comment-chars = (cc1; cc2; cc3)+, star*, lpar* | star+, lpar* | lpar+.
              -cc1 = not-star-or-lpar.
              -cc2 = lpar+, not-star-or-lpar.
	      -cc3 = star+, lpar+, not-star-or-lpar
                   | star+, not-star-or-lrpar.
 -not-star-or-lpar = ~["*("].
-not-star-or-lrpar = ~["(*)"].
             -lpar = "(".
	     -star = "*".

            -digit = ['0'-'9'].
           -letter = ['a'-'z'; 'A'-'Z'].

             TIMES = -"*".
	       DIV = -"DIV".
	       MOD = -"MOD".
	       AND = -"AND".
	      PLUS = -"+".
	     MINUS = -"-".
	        OR = -"OR".
               EQL = -"=".
	       NEQ = -"#".
	       LSS = -"<".
	       LEQ = -"<=".
	       GTR = -">".
	       GEQ = -">=".

{ Separators }
             -SEMI = S?, -";", S?.
            -COLON = S?, -":", S?.
           -EQUALS = S?, -"=", S?.

Test case: g41.c08

Repository URI: …/tests/misc/misc-041-060-catalog.xml

Created 15 Jun 2022 by cmsmcq

Error case generated by random modification of g41.c00.

In the BinSearch routine, the test x < a[k] was modified by changing the identifier 'k' to a subscript (so it became x < a[[k]]).

Input string (891 characters)

MODULE Samples;
(* Wirth 1995, pp. 37-38 *)

PROCEDURE Multiply;
  VAR x, y, z: INTEGER;
BEGIN Read(x); Read(y); z := 0;
  WHILE x > 0 DO
    IF x MOD 2 = 1 THEN z := z + y END ;
    y := 2*y; x := x DIV 2
  END ;
  Write(x); Write(y); Write(z); WriteLn
END Multiply;

PROCEDURE Divide;
  VAR x, y, r, q, w: INTEGER;
BEGIN Read(x); Read(y); r := x; q := 0; w := y;
  WHILE w <= r DO w := 2*w END ;
  WHILE w > y DO
    q := 2*q; w := w DIV 2;
    IF w <= r THEN r := r - w; q := q + 1 END
  END ;
  Write(x); Write(y); Write(q); Write(r); WriteLn
END Divide;

PROCEDURE BinSearch;
  VAR i, j, k, n: INTEGER;
    a: ARRAY 32 OF INTEGER;
BEGIN Read(n); k := 0;
  WHILE k < n DO Read(a[k]); k := k + 1 END ;
  i := 0; j := n;
  WHILE i < j DO
    k := (i+j) DIV 2;
    IF x < a[[k]] THEN j := k ELSE i := k + 1 END
  END ;
  Write(i); Write(j); Write(a[j]); WriteLn
END BinSearch;

END Samples.

Expected result

The input does not match the grammar.