iXML Community Group Test Suite

23 Oct 2023 (22 Nov 2023)

Top-level catalog for tests in the iXML Community Group Test Suite.

Tests have been contributed from several sources, but the core of the test collection are the tests contributed by Steven Pemberton in December 2021.

Error tests

28 Jun 2022

Tests intended to demonstrate errors that processors are required to raise.

non-XML-char-in-input-output-errors

Created 20 Jun 2022 by MSM

Followup to invalid-char. Tests for serialization of non-XML characters in a different way.

In this test set, the grammar accepts a range of non-XML characters and hides some of them, but not all. Input in which only 'hidden' control characters are present produces output, input in which non-hidden controls are present should produce a dynamic error.

Invisible XML Grammar

{ Grammar for experiments with non-XML characters in input.

  Some input characters are to be shown, some to be hidden.
  We choose to show the characters with prime character
  numbers and hide those with composite numbers.

  Since we need non-XML characters in the input, and the only 
  non-XML characters we can make are in the C0 range, the 
  input and output are likely to be challenging to check.
  
  You have been warned.
}
S: (show; hide; printable)*.

NUL: -#00 { null }.
SOH: -#01 { start of header }.
STX:  #02 { start of text }.
ETX:  #03 { end of text }.
EOT: -#04 { end of transmission }.
ENQ:  #05 { enquiry ('are you there?') }.
ACK: -#06 { acknowledgement ('yes i am here') }.
BEL:  #07 { bell }.
BS:  -#08 { backspace }.
HT:  -#09 { horizontal tab, an XML character, but 
           not a prime, so it's in the 'hide' category }.
LF:  -#0A { linefeed, also an XML character }.
VT:   #0B { vertical tabulation }.
FF:  -#0C { form feed }.
CR:   #0D { carriage return (an XML character) }.
SO:  -#0E { shift out (to alternate character set }.
SI:  -#0F { shift in (from alternate character set }.
DLE: -#10 { data link escape }.
DC1:  #11 { device-control 1 / XON }.
DC2: -#12 { device-control 2 }.
DC3:  #13 { device-control 3 / XOFF }.
DC4: -#14 { device-control 4 }.
NAK: -#15 { negative acknowledgement }.
SYN: -#16 { synchronous idle }.
ETB:  #17 { end of transmission block / end of paragraph }.
CAN: -#18 { cancel }.
EM:  -#19 { end of medium / em-space / beginning of paragraph }.
SUB: -#1A { substitute (often used for end of file) }.
ESC: -#1B { escape }.
FS:  -#1C { file separator }.
GS:   #1D { group separator }.
RS:  -#1E { record separator }.
US:   #1F { unit separator }.

show: STX; ETX; ENQ; BEL; VT; CR; DC1; DC3; ETB; GS; US.
hide:  NUL; SOH; EOT; ACK; BS; HT; LF; FF; SO; SI; DLE; DC2;
        DC4; NAK; SYN; CAN; EM; SUB; ESC; FS; RS.

-printable: [#20 - #7E].

Test case: Grammar test

Repository URI: …/tests/error/test-catalog.xml

Expected result

<ixml>
   <comment> Grammar for experiments with non-XML characters in input.

  Some input characters are to be shown, some to be hidden.
  We choose to show the characters with prime character
  numbers and hide those with composite numbers.

  Since we need non-XML characters in the input, and the only 
  non-XML characters we can make are in the C0 range, the 
  input and output are likely to be challenging to check.
  
  You have been warned.
</comment>
   <rule name="S">
      <alt>
         <repeat0>
            <alts>
               <alt>
                  <nonterminal name="show"/>
               </alt>
               <alt>
                  <nonterminal name="hide"/>
               </alt>
               <alt>
                  <nonterminal name="printable"/>
               </alt>
            </alts>
         </repeat0>
      </alt>
   </rule>
   <rule name="NUL">
      <alt>
         <literal tmark="-" hex="00">
            <comment> null </comment>
         </literal>
      </alt>
   </rule>
   <rule name="SOH">
      <alt>
         <literal tmark="-" hex="01">
            <comment> start of header </comment>
         </literal>
      </alt>
   </rule>
   <rule name="STX">
      <alt>
         <literal hex="02">
            <comment> start of text </comment>
         </literal>
      </alt>
   </rule>
   <rule name="ETX">
      <alt>
         <literal hex="03">
            <comment> end of text </comment>
         </literal>
      </alt>
   </rule>
   <rule name="EOT">
      <alt>
         <literal tmark="-" hex="04">
            <comment> end of transmission </comment>
         </literal>
      </alt>
   </rule>
   <rule name="ENQ">
      <alt>
         <literal hex="05">
            <comment> enquiry ('are you there?') </comment>
         </literal>
      </alt>
   </rule>
   <rule name="ACK">
      <alt>
         <literal tmark="-" hex="06">
            <comment> acknowledgement ('yes i am here') </comment>
         </literal>
      </alt>
   </rule>
   <rule name="BEL">
      <alt>
         <literal hex="07">
            <comment> bell </comment>
         </literal>
      </alt>
   </rule>
   <rule name="BS">
      <alt>
         <literal tmark="-" hex="08">
            <comment> backspace </comment>
         </literal>
      </alt>
   </rule>
   <rule name="HT">
      <alt>
         <literal tmark="-" hex="09">
            <comment> horizontal tab, an XML character, but 
           not a prime, so it's in the 'hide' category </comment>
         </literal>
      </alt>
   </rule>
   <rule name="LF">
      <alt>
         <literal tmark="-" hex="0A">
            <comment> linefeed, also an XML character </comment>
         </literal>
      </alt>
   </rule>
   <rule name="VT">
      <alt>
         <literal hex="0B">
            <comment> vertical tabulation </comment>
         </literal>
      </alt>
   </rule>
   <rule name="FF">
      <alt>
         <literal tmark="-" hex="0C">
            <comment> form feed </comment>
         </literal>
      </alt>
   </rule>
   <rule name="CR">
      <alt>
         <literal hex="0D">
            <comment> carriage return (an XML character) </comment>
         </literal>
      </alt>
   </rule>
   <rule name="SO">
      <alt>
         <literal tmark="-" hex="0E">
            <comment> shift out (to alternate character set </comment>
         </literal>
      </alt>
   </rule>
   <rule name="SI">
      <alt>
         <literal tmark="-" hex="0F">
            <comment> shift in (from alternate character set </comment>
         </literal>
      </alt>
   </rule>
   <rule name="DLE">
      <alt>
         <literal tmark="-" hex="10">
            <comment> data link escape </comment>
         </literal>
      </alt>
   </rule>
   <rule name="DC1">
      <alt>
         <literal hex="11">
            <comment> device-control 1 / XON </comment>
         </literal>
      </alt>
   </rule>
   <rule name="DC2">
      <alt>
         <literal tmark="-" hex="12">
            <comment> device-control 2 </comment>
         </literal>
      </alt>
   </rule>
   <rule name="DC3">
      <alt>
         <literal hex="13">
            <comment> device-control 3 / XOFF </comment>
         </literal>
      </alt>
   </rule>
   <rule name="DC4">
      <alt>
         <literal tmark="-" hex="14">
            <comment> device-control 4 </comment>
         </literal>
      </alt>
   </rule>
   <rule name="NAK">
      <alt>
         <literal tmark="-" hex="15">
            <comment> negative acknowledgement </comment>
         </literal>
      </alt>
   </rule>
   <rule name="SYN">
      <alt>
         <literal tmark="-" hex="16">
            <comment> synchronous idle </comment>
         </literal>
      </alt>
   </rule>
   <rule name="ETB">
      <alt>
         <literal hex="17">
            <comment> end of transmission block / end of paragraph </comment>
         </literal>
      </alt>
   </rule>
   <rule name="CAN">
      <alt>
         <literal tmark="-" hex="18">
            <comment> cancel </comment>
         </literal>
      </alt>
   </rule>
   <rule name="EM">
      <alt>
         <literal tmark="-" hex="19">
            <comment> end of medium / em-space / beginning of paragraph </comment>
         </literal>
      </alt>
   </rule>
   <rule name="SUB">
      <alt>
         <literal tmark="-" hex="1A">
            <comment> substitute (often used for end of file) </comment>
         </literal>
      </alt>
   </rule>
   <rule name="ESC">
      <alt>
         <literal tmark="-" hex="1B">
            <comment> escape </comment>
         </literal>
      </alt>
   </rule>
   <rule name="FS">
      <alt>
         <literal tmark="-" hex="1C">
            <comment> file separator </comment>
         </literal>
      </alt>
   </rule>
   <rule name="GS">
      <alt>
         <literal hex="1D">
            <comment> group separator </comment>
         </literal>
      </alt>
   </rule>
   <rule name="RS">
      <alt>
         <literal tmark="-" hex="1E">
            <comment> record separator </comment>
         </literal>
      </alt>
   </rule>
   <rule name="US">
      <alt>
         <literal hex="1F">
            <comment> unit separator </comment>
         </literal>
      </alt>
   </rule>
   <rule name="show">
      <alt>
         <nonterminal name="STX"/>
      </alt>
      <alt>
         <nonterminal name="ETX"/>
      </alt>
      <alt>
         <nonterminal name="ENQ"/>
      </alt>
      <alt>
         <nonterminal name="BEL"/>
      </alt>
      <alt>
         <nonterminal name="VT"/>
      </alt>
      <alt>
         <nonterminal name="CR"/>
      </alt>
      <alt>
         <nonterminal name="DC1"/>
      </alt>
      <alt>
         <nonterminal name="DC3"/>
      </alt>
      <alt>
         <nonterminal name="ETB"/>
      </alt>
      <alt>
         <nonterminal name="GS"/>
      </alt>
      <alt>
         <nonterminal name="US"/>
      </alt>
   </rule>
   <rule name="hide">
      <alt>
         <nonterminal name="NUL"/>
      </alt>
      <alt>
         <nonterminal name="SOH"/>
      </alt>
      <alt>
         <nonterminal name="EOT"/>
      </alt>
      <alt>
         <nonterminal name="ACK"/>
      </alt>
      <alt>
         <nonterminal name="BS"/>
      </alt>
      <alt>
         <nonterminal name="HT"/>
      </alt>
      <alt>
         <nonterminal name="LF"/>
      </alt>
      <alt>
         <nonterminal name="FF"/>
      </alt>
      <alt>
         <nonterminal name="SO"/>
      </alt>
      <alt>
         <nonterminal name="SI"/>
      </alt>
      <alt>
         <nonterminal name="DLE"/>
      </alt>
      <alt>
         <nonterminal name="DC2"/>
      </alt>
      <alt>
         <nonterminal name="DC4"/>
      </alt>
      <alt>
         <nonterminal name="NAK"/>
      </alt>
      <alt>
         <nonterminal name="SYN"/>
      </alt>
      <alt>
         <nonterminal name="CAN"/>
      </alt>
      <alt>
         <nonterminal name="EM"/>
      </alt>
      <alt>
         <nonterminal name="SUB"/>
      </alt>
      <alt>
         <nonterminal name="ESC"/>
      </alt>
      <alt>
         <nonterminal name="FS"/>
      </alt>
      <alt>
         <nonterminal name="RS"/>
      </alt>
   </rule>
   <rule mark="-" name="printable">
      <alt>
         <inclusion>
            <member from="#20" to="#7E"/>
         </inclusion>
      </alt>
   </rule>
</ixml>