iXML Community Group Test Suite

01 Jun 2022 (03 Jun 2022)

Top-level catalog for tests in the iXML Community Group Test Suite.

Tests have been contributed from several sources, but the core of the test collection are the tests contributed by Steven Pemberton in December 2021.

ixml tests

01 Jun 2022

Tests provided by Steven Pemberton in December 2021, with corrections of 21 December. Reorganized by Norm Tovey-Walsh, February 2022.

ixml

Created 16 Dec 2021 by SP

Invisible XML Grammar

         ixml: s, rule++s, s.

           -s: (whitespace; comment)*.
  -whitespace: -[Zs]; tab; lf; cr.
         -tab: -#9.
          -lf: -#a.
          -cr: -#d.
      comment: -"{", (cchar; comment)*, -"}".
       -cchar: ~["{}"].

         rule: (mark, s)?, name, s, -["=:"], s, -alts, -".".
        @mark: ["@^-"].
         alts: alt++(-[";|"], s).
          alt: term**(-",", s).
        -term: factor;
               option;
               repeat0;
               repeat1.
      -factor: terminal;
               nonterminal;
               -"(", s, alts, -")", s.
      repeat0: factor, -"*", s, sep?.
      repeat1: factor, -"+", s, sep?.
       option: factor, -"?", s.
          sep: factor.
  nonterminal: (mark, s)?, name, s.

    -terminal: literal; 
               charset.
      literal: quoted;
               encoded.
      -quoted: (tmark, s)?, -string.

        @name: namestart, namefollower*.
   -namestart: ["_"; L].
-namefollower: namestart; ["-.·‿⁀"; Nd; Mn].

       @tmark: ["^-"].
       string: -'"', dstring, -'"', s;
               -"'", sstring, -"'", s.
     @dstring: dchar+.
     @sstring: schar+.
        dchar: ~['"'; #a; #d];
               '"', -'"'. {all characters except line breaks; quotes must be doubled}
        schar: ~["'"; #a; #d];
               "'", -"'". {all characters except line breaks; quotes must be doubled}
     -encoded: (tmark, s)?, -"#", @hex, s.
          hex: ["0"-"9"; "a"-"f"; "A"-"F"]+.

     -charset: inclusion; 
               exclusion.
    inclusion: (tmark, s)?,          set.
    exclusion: (tmark, s)?, -"~", s, set.
         -set: -"[", s,  member**(-[";|"], s), -"]", s.
      -member: literal;
               range;
               class.
        range: from, -"-", s, to.
        @from: character.
          @to: character.
   -character: -'"', dchar, -'"', s;
               -"'", schar, -"'", s;
               "#", hex, s.
        class: code, s.
        @code: capital, letter?.
     -capital: ["A"-"Z"].
      -letter: ["a"-"z"].

Test case: ixml

Repository URI: …/tests/ixml/test-catalog.xml

Created 16 Dec 2021 by SP

Updated 30 Dec 2021 by MSM

stripped non-significant whitespace from result

Input string (2011 characters)

         ixml: s, rule+s, s.

           -s: (whitespace; comment)*.
  -whitespace: -[Zs]; tab; lf; cr.
         -tab: -#9.
          -lf: -#a.
          -cr: -#d.
      comment: -"{", (cchar; comment)*, -"}".
       -cchar: ~["{}"].

         rule: (mark, s)?, name, s, -["=:"], s, -alts, -".".
        @mark: ["@^-"].
         alts: alt+(-[";|"], s).
          alt: term*(-",", s).
        -term: factor;
               option;
               repeat0;
               repeat1.
      -factor: terminal;
               nonterminal;
               -"(", s, alts, -")", s.
      repeat0: factor, -"*", s, sep?.
      repeat1: factor, -"+", s, sep?.
       option: factor, -"?", s.
          sep: factor.
  nonterminal: (mark, s)?, name, s.

    -terminal: literal; 
               charset.
      literal: quoted;
               encoded.
      -quoted: (tmark, s)?, -string.

        @name: namestart, namefollower*.
   -namestart: ["_"; L].
-namefollower: namestart; ["-.·‿⁀"; Nd; Mn].

       @tmark: ["^-"].
       string: -'"', dstring, -'"', s;
               -"'", sstring, -"'", s.
     @dstring: dchar+.
     @sstring: schar+.
        dchar: ~['"'; #a; #d];
               '"', -'"'. {all characters except line breaks; quotes must be doubled}
        schar: ~["'"; #a; #d];
               "'", -"'". {all characters except line breaks; quotes must be doubled}
     -encoded: (tmark, s)?, -"#", @hex, s.
          hex: ["0"-"9"; "a"-"f"; "A"-"F"]+.

     -charset: inclusion; 
               exclusion.
    inclusion: (tmark, s)?,          set.
    exclusion: (tmark, s)?, -"~", s, set.
         -set: -"[", s,  member*(-[";|"], s), -"]", s.
      -member: literal;
               range;
               class.
        range: from, -"-", s, to.
        @from: character.
          @to: character.
   -character: -'"', dchar, -'"', s;
               -"'", schar, -"'", s;
               "#", hex, s.
        class: code, s.
        @code: capital, letter?.
     -capital: ["A"-"Z"].
      -letter: ["a"-"z"].

Expected result

<ixml>
   <rule name="ixml">
      <alt>
         <nonterminal name="s"/>
         <repeat1>
            <nonterminal name="rule"/>
            <sep>
               <nonterminal name="s"/>
            </sep>
         </repeat1>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule mark="-" name="s">
      <alt>
         <repeat0>
            <alts>
               <alt>
                  <nonterminal name="whitespace"/>
               </alt>
               <alt>
                  <nonterminal name="comment"/>
               </alt>
            </alts>
         </repeat0>
      </alt>
   </rule>
   <rule mark="-" name="whitespace">
      <alt>
         <inclusion tmark="-">
            <class code="Zs"/>
         </inclusion>
      </alt>
      <alt>
         <nonterminal name="tab"/>
      </alt>
      <alt>
         <nonterminal name="lf"/>
      </alt>
      <alt>
         <nonterminal name="cr"/>
      </alt>
   </rule>
   <rule mark="-" name="tab">
      <alt>
         <literal tmark="-" hex="9"/>
      </alt>
   </rule>
   <rule mark="-" name="lf">
      <alt>
         <literal tmark="-" hex="a"/>
      </alt>
   </rule>
   <rule mark="-" name="cr">
      <alt>
         <literal tmark="-" hex="d"/>
      </alt>
   </rule>
   <rule name="comment">
      <alt>
         <literal tmark="-" dstring="{"/>
         <repeat0>
            <alts>
               <alt>
                  <nonterminal name="cchar"/>
               </alt>
               <alt>
                  <nonterminal name="comment"/>
               </alt>
            </alts>
         </repeat0>
         <literal tmark="-" dstring="}"/>
      </alt>
   </rule>
   <rule mark="-" name="cchar">
      <alt>
         <exclusion>
            <literal dstring="{}"/>
         </exclusion>
      </alt>
   </rule>
   <rule name="rule">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="mark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <nonterminal name="name"/>
         <nonterminal name="s"/>
         <inclusion tmark="-">
            <literal dstring="=:"/>
         </inclusion>
         <nonterminal name="s"/>
         <nonterminal mark="-" name="alts"/>
         <literal tmark="-" dstring="."/>
      </alt>
   </rule>
   <rule mark="@" name="mark">
      <alt>
         <inclusion>
            <literal dstring="@^-"/>
         </inclusion>
      </alt>
   </rule>
   <rule name="alts">
      <alt>
         <repeat1>
            <nonterminal name="alt"/>
            <sep>
               <alts>
                  <alt>
                     <inclusion tmark="-">
                        <literal dstring=";|"/>
                     </inclusion>
                     <nonterminal name="s"/>
                  </alt>
               </alts>
            </sep>
         </repeat1>
      </alt>
   </rule>
   <rule name="alt">
      <alt>
         <repeat0>
            <nonterminal name="term"/>
            <sep>
               <alts>
                  <alt>
                     <literal tmark="-" dstring=","/>
                     <nonterminal name="s"/>
                  </alt>
               </alts>
            </sep>
         </repeat0>
      </alt>
   </rule>
   <rule mark="-" name="term">
      <alt>
         <nonterminal name="factor"/>
      </alt>
      <alt>
         <nonterminal name="option"/>
      </alt>
      <alt>
         <nonterminal name="repeat0"/>
      </alt>
      <alt>
         <nonterminal name="repeat1"/>
      </alt>
   </rule>
   <rule mark="-" name="factor">
      <alt>
         <nonterminal name="terminal"/>
      </alt>
      <alt>
         <nonterminal name="nonterminal"/>
      </alt>
      <alt>
         <literal tmark="-" dstring="("/>
         <nonterminal name="s"/>
         <nonterminal name="alts"/>
         <literal tmark="-" dstring=")"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule name="repeat0">
      <alt>
         <nonterminal name="factor"/>
         <literal tmark="-" dstring="*"/>
         <nonterminal name="s"/>
         <option>
            <nonterminal name="sep"/>
         </option>
      </alt>
   </rule>
   <rule name="repeat1">
      <alt>
         <nonterminal name="factor"/>
         <literal tmark="-" dstring="+"/>
         <nonterminal name="s"/>
         <option>
            <nonterminal name="sep"/>
         </option>
      </alt>
   </rule>
   <rule name="option">
      <alt>
         <nonterminal name="factor"/>
         <literal tmark="-" dstring="?"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule name="sep">
      <alt>
         <nonterminal name="factor"/>
      </alt>
   </rule>
   <rule name="nonterminal">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="mark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <nonterminal name="name"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule mark="-" name="terminal">
      <alt>
         <nonterminal name="literal"/>
      </alt>
      <alt>
         <nonterminal name="charset"/>
      </alt>
   </rule>
   <rule name="literal">
      <alt>
         <nonterminal name="quoted"/>
      </alt>
      <alt>
         <nonterminal name="encoded"/>
      </alt>
   </rule>
   <rule mark="-" name="quoted">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="tmark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <nonterminal mark="-" name="string"/>
      </alt>
   </rule>
   <rule mark="@" name="name">
      <alt>
         <nonterminal name="namestart"/>
         <repeat0>
            <nonterminal name="namefollower"/>
         </repeat0>
      </alt>
   </rule>
   <rule mark="-" name="namestart">
      <alt>
         <inclusion>
            <literal dstring="_"/>
            <class code="L"/>
         </inclusion>
      </alt>
   </rule>
   <rule mark="-" name="namefollower">
      <alt>
         <nonterminal name="namestart"/>
      </alt>
      <alt>
         <inclusion>
            <literal dstring="-.·‿⁀"/>
            <class code="Nd"/>
            <class code="Mn"/>
         </inclusion>
      </alt>
   </rule>
   <rule mark="@" name="tmark">
      <alt>
         <inclusion>
            <literal dstring="^-"/>
         </inclusion>
      </alt>
   </rule>
   <rule name="string">
      <alt>
         <literal tmark="-" sstring="&#34;"/>
         <nonterminal name="dstring"/>
         <literal tmark="-" sstring="&#34;"/>
         <nonterminal name="s"/>
      </alt>
      <alt>
         <literal tmark="-" dstring="'"/>
         <nonterminal name="sstring"/>
         <literal tmark="-" dstring="'"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule mark="@" name="dstring">
      <alt>
         <repeat1>
            <nonterminal name="dchar"/>
         </repeat1>
      </alt>
   </rule>
   <rule mark="@" name="sstring">
      <alt>
         <repeat1>
            <nonterminal name="schar"/>
         </repeat1>
      </alt>
   </rule>
   <rule name="dchar">
      <alt>
         <exclusion>
            <literal sstring="&#34;"/>
            <literal hex="a"/>
            <literal hex="d"/>
         </exclusion>
      </alt>
      <alt>
         <literal sstring="&#34;"/>
         <literal tmark="-" sstring="&#34;"/>
      </alt>
   </rule>
   <comment>all characters except line breaks; quotes must be doubled</comment>
   <rule name="schar">
      <alt>
         <exclusion>
            <literal dstring="'"/>
            <literal hex="a"/>
            <literal hex="d"/>
         </exclusion>
      </alt>
      <alt>
         <literal dstring="'"/>
         <literal tmark="-" dstring="'"/>
      </alt>
   </rule>
   <comment>all characters except line breaks; quotes must be doubled</comment>
   <rule mark="-" name="encoded">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="tmark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <literal tmark="-" dstring="#"/>
         <nonterminal mark="@" name="hex"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule name="hex">
      <alt>
         <repeat1>
            <inclusion>
               <range from="0" to="9"/>
               <range from="a" to="f"/>
               <range from="A" to="F"/>
            </inclusion>
         </repeat1>
      </alt>
   </rule>
   <rule mark="-" name="charset">
      <alt>
         <nonterminal name="inclusion"/>
      </alt>
      <alt>
         <nonterminal name="exclusion"/>
      </alt>
   </rule>
   <rule name="inclusion">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="tmark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <nonterminal name="set"/>
      </alt>
   </rule>
   <rule name="exclusion">
      <alt>
         <option>
            <alts>
               <alt>
                  <nonterminal name="tmark"/>
                  <nonterminal name="s"/>
               </alt>
            </alts>
         </option>
         <literal tmark="-" dstring="~"/>
         <nonterminal name="s"/>
         <nonterminal name="set"/>
      </alt>
   </rule>
   <rule mark="-" name="set">
      <alt>
         <literal tmark="-" dstring="["/>
         <nonterminal name="s"/>
         <repeat0>
            <nonterminal name="member"/>
            <sep>
               <alts>
                  <alt>
                     <inclusion tmark="-">
                        <literal dstring=";|"/>
                     </inclusion>
                     <nonterminal name="s"/>
                  </alt>
               </alts>
            </sep>
         </repeat0>
         <literal tmark="-" dstring="]"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule mark="-" name="member">
      <alt>
         <nonterminal name="literal"/>
      </alt>
      <alt>
         <nonterminal name="range"/>
      </alt>
      <alt>
         <nonterminal name="class"/>
      </alt>
   </rule>
   <rule name="range">
      <alt>
         <nonterminal name="from"/>
         <literal tmark="-" dstring="-"/>
         <nonterminal name="s"/>
         <nonterminal name="to"/>
      </alt>
   </rule>
   <rule mark="@" name="from">
      <alt>
         <nonterminal name="character"/>
      </alt>
   </rule>
   <rule mark="@" name="to">
      <alt>
         <nonterminal name="character"/>
      </alt>
   </rule>
   <rule mark="-" name="character">
      <alt>
         <literal tmark="-" sstring="&#34;"/>
         <nonterminal name="dchar"/>
         <literal tmark="-" sstring="&#34;"/>
         <nonterminal name="s"/>
      </alt>
      <alt>
         <literal tmark="-" dstring="'"/>
         <nonterminal name="schar"/>
         <literal tmark="-" dstring="'"/>
         <nonterminal name="s"/>
      </alt>
      <alt>
         <literal dstring="#"/>
         <nonterminal name="hex"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule name="class">
      <alt>
         <nonterminal name="code"/>
         <nonterminal name="s"/>
      </alt>
   </rule>
   <rule mark="@" name="code">
      <alt>
         <nonterminal name="capital"/>
         <option>
            <nonterminal name="letter"/>
         </option>
      </alt>
   </rule>
   <rule mark="-" name="capital">
      <alt>
         <inclusion>
            <range from="A" to="Z"/>
         </inclusion>
      </alt>
   </rule>
   <rule mark="-" name="letter">
      <alt>
         <inclusion>
            <range from="a" to="z"/>
         </inclusion>
      </alt>
   </rule>
</ixml>