4. The TIGER-XML schema

The TIGER-XML format is validated against an XML schema. XML schema validation is supported by all major XML parsers. The schema is divided into three parts: the main schema, the subschema for the corpus header, and the subschema for subcorpora. The TIGER-XML schema and its two subschemas are placed in the schema/ subdirectory of your TIGERSearch installation.

Part 1: Main schema - TigerXML.xsd

<schema>

 <!-- ==================================================================
      XML Schema for the TIGER-XML format
      http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXML.xsd
      ==================================================================
      TIGER Project, Wolfgang Lezius
      IMS, University of Stuttgart, 04/01/2003
      ================================================================== -->


  <!-- ======================================================
       INCLUDES DECLARATION OF THE HEADER
       ====================================================== -->
  <include schemaLocation="TigerXMLHeader.xsd"/>


  <!-- ======================================================
       INCLUDES DECLARATION OF SUBCORPORA AND SENTENCES
       ====================================================== -->
  <include schemaLocation="TigerXMLSubcorpus.xsd"/>


  <!-- ======================================================
       DECLARATION OF THE CORPUS DOCUMENT
       ====================================================== -->

  <!-- declaration of the root element: corpus -->

  <element name="corpus">
  
    <complexType>

      <sequence>

        <choice>           
           <!-- header of the document is optional -->
           <element name="head" type="headType" minOccurs="0" maxOccurs="1"/>
        </choice>

        <element name="body" type="bodyType" minOccurs="1" maxOccurs="1"/>

      </sequence>

      <!-- corpus ID -->
      <attribute name="id" type="idType" use="required"/>

      <!-- optional attribute: TigerXML version; used by TIGERSearch only -->
      <attribute name="version" type="xsd:string" use="optional"/>

    </complexType>
  
  </element>


  <!-- declaration of the body type -->

  <complexType name="bodyType">

    <choice minOccurs="1" maxOccurs="unbounded">
      <element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/>
      <element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/>
    </choice>

  </complexType>


</schema>

Part 2: Subschema for the corpus header - TigerXMLHeader.xsd

<schema>

 <!-- =======================================================================
      XML SubSchema for the header part of the TIGER-XML format
      http://www.ims.uni-stuttgart.de/projekte/TIGER/publicTigerXMLHeader.xsd
      =======================================================================
      TIGER Project, Wolfgang Lezius 
      IMS, University of Stuttgart, 04/01/2003
      ======================================================================= -->


  <!-- ======================================================
       DECLARATION OF THE HEADER
       ====================================================== -->


  <!-- declaration of the head element -->

  <element name="head" type="headType"/>


  <!-- declaration of the header type -->

  <complexType name="headType">

     <sequence>
        <element name="meta" type="metaType" minOccurs="0" maxOccurs="1"/>
        <element name="annotation" type="annotationType" minOccurs="0" maxOccurs="1"/>
     </sequence>    

     <!-- optional: reference to external header file 

          The header of a TigerXML corpus can also be stored in separate file. 
          This attribute points to the external header file. The pointer is
          an URI. Examples: file:relative.xml or file:/path/to/absolute.xml

          Note: If there is a pointer to an external file, the head
                element must be empty. -->

     <attribute name="external" type="xsd:anyURI"/>  

  </complexType>


  <!-- declaration of the meta information type -->

  <complexType name="metaType">

    <sequence>
      <element name="name" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <element name="author" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <element name="date" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <element name="description" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <element name="format" type="xsd:string" minOccurs="0" maxOccurs="1"/>
      <element name="history" type="xsd:string" minOccurs="0" maxOccurs="1"/>
    </sequence>    

  </complexType>
  

  <!-- declaration of the annotation type -->

  <complexType name="annotationType">

    <sequence>
      <element name="feature" type="featureType" minOccurs="1" maxOccurs="unbounded"/>
      <element name="edgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/>
      <element name="secedgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/>
    </sequence>

  </complexType>


  <!-- declaration of the feature type -->

  <complexType name="featureType">

    <sequence>
       <element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/>
    </sequence>
    
    <attribute name="name" type="featurenameType" use="required"/>

    <attribute name="domain" use="required">
       <simpleType>
         <restriction base="xsd:string">
           <enumeration value="T"/>     <!-- feature for terminal nodes -->
           <enumeration value="NT"/>    <!-- feature for nonterminal nodes -->
           <enumeration value="FREC"/>  <!-- feature for both -->
         </restriction>
       </simpleType>
    </attribute>

  </complexType>


  <!-- declaration of the (secondary) edge label type -->

  <complexType name="edgelabelType">

    <sequence>
       <element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/>
    </sequence>
    
  </complexType>


  <!-- declaration of the feature value type -->

  <complexType name="featurevalueType">

    <simpleContent>   <!-- element content: documentation of the feature value -->
      <extension base="xsd:string">
        <attribute name="name" type="xsd:string"/>
      </extension>
    </simpleContent>


  </complexType>


  <!-- ======================================================
       HEADER DECLARATIONS THAT SHOULD BE REFINED
       ====================================================== -->

  <!-- declaration of the FEATURE NAMES used in the corpus header;
       this type is unrestricted, but should be refined by a 
       specialised, corpus-dependent schema -->

  <simpleType name="featurenameType">

    <restriction base="xsd:string">
      <minLength value="1"/>
      <maxLength value="20"/>
      <whiteSpace value="preserve"/>
    </restriction>

  </simpleType>


</schema>

Part 3: Subschema for subcorpora - TigerXMLSubcorpus.xsd

<schema>

 <!-- ===========================================================================
      XML Schema for the subcorpus part of the TIGER-XML format
      http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXMLSubcorpus.xsd
      ===========================================================================
      TIGER Project, Wolfgang Lezius
      IMS, University of Stuttgart, 04/01/2003
      =========================================================================== -->

  <!-- ======================================================
       DECLARATION OF SUBCORPORA AND SENTENCES
       ====================================================== -->


  <!-- declaration of the subcorpus element -->

  <element name="subcorpus" type="subcorpusType"/>


  <!-- declaration of the subcorpus type -->

  <complexType name="subcorpusType">

    <!-- A subcorpus may comprise another subcorpora or sentences -->

    <choice minOccurs="0" maxOccurs="unbounded">
      <element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/>
      <element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/>
    </choice>

    <!-- required: subcorpus name -->
 
    <attribute name="name" type="xsd:string" use="required"/>

    <!-- optional: reference to external subcorpus file 

         A subcorpus of a TigerXML corpus can also be stored in separate file. 
         This attribute points to the external subcorpus file. The pointer is
         an URI. Examples: file:relative.xml or file:/path/to/absolute.xml 

         Note: If there is a pointer to an external file, the subcorpus
               element must be empty. -->

    <attribute name="external" type="xsd:anyURI"/>  

  </complexType>


  <!-- declaration of the sentence type -->

  <complexType name="sentenceType">

    <sequence>
      <element name="graph" type="graphType" minOccurs="0" maxOccurs="1"/>
      <element name="matches" type="matchesType" minOccurs="0" maxOccurs="1"/>
    </sequence>

    <attribute name="id" type="idType" use="required"/>

  </complexType>


  <!-- declaration of the graph type -->

  <complexType name="graphType">

    <sequence>
      <element name="terminals" type="terminalsType" minOccurs="1" maxOccurs="1"/>
      <element name="nonterminals" type="nonterminalsType" minOccurs="1" maxOccurs="1"/>
    </sequence>

    <attribute name="root" type="idrefType" use="required"/>

    <!-- indicated that the exported sentence is discontinuous -->
    <attribute name="discontinuous" type="xsd:boolean" default="false" use="optional"/>

  </complexType>


  <!-- declaration of the terminals type -->

  <complexType name="terminalsType">

    <sequence>
      <element name="t" type="tType" minOccurs="1" maxOccurs="unbounded"/>
    </sequence>

  </complexType>


  <!-- declaration of the t element -->

  <complexType name="tType">

    <!-- secondary edges possible -->
    <sequence>
      <element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/>
    </sequence>

    <attribute name="id" type="idType" use="required"/>    
    <attributeGroup ref="tfeatureAttributes"/>

  </complexType>


  <!-- declaration of the nonterminals type -->

  <complexType name="nonterminalsType">

    <sequence>
      <element name="nt" type="ntType" minOccurs="0" maxOccurs="unbounded"/>
    </sequence>

  </complexType>


  <!-- declaration of the nt element -->

  <complexType name="ntType">

    <!-- edge and secondary edges possible -->
    <sequence>
      <element name="edge" type="edgeType" minOccurs="0" maxOccurs="unbounded"/>
      <element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/>
    </sequence>

    <attribute name="id" type="idType" use="required"/>    
    <attributeGroup ref="ntfeatureAttributes"/>

  </complexType>


  <!-- declaration of the edge type -->

  <complexType name="edgeType">

    <attribute name="idref" type="idrefType" use="required"/>    

    <attributeGroup ref="edgelabelAttribute"/>

  </complexType>


  <!-- declaration of the secondary edge type -->

  <complexType name="secedgeType">

    <attribute name="idref" type="idrefType" use="required"/>    

    <attributeGroup ref="secedgelabelAttribute"/>

  </complexType>


  <!-- declaration of the matches type -->

  <complexType name="matchesType">

    <sequence>
      <element name="match" type="matchType" minOccurs="1" maxOccurs="unbounded"/>
    </sequence>

  </complexType>


  <!-- declaration of the match type -->

  <complexType name="matchType">

    <sequence>
      <element name="variable" type="varType" minOccurs="1" maxOccurs="unbounded"/>
    </sequence>

    <attribute name="subgraph" type="idrefType" use="required"/>    

  </complexType>


  <!-- declaration of the variable type -->

  <complexType name="varType">

    <attribute name="name" type="xsd:string" use="required"/>    

    <attribute name="idref" type="idrefType" use="required"/>    

  </complexType>


  <!-- ======================================================
       SENTENCE DECLARATIONS THAT SHOULD BE REFINED
       ====================================================== -->

  <!-- declaration of the TERMINAL FEATURE ATTRIBUTES;
       this group is unrestricted, but should be refined by a 
       specialised, corpus-dependent schema -->

  <attributeGroup name="tfeatureAttributes">
  
    <anyAttribute processContents="skip"/>

  </attributeGroup>


  <!-- declaration of the NONTERMINAL FEATURE ATTRIBUTES;
       this group is unrestricted, but should be refined by a 
       specialised, corpus-dependent schema -->

  <attributeGroup name="ntfeatureAttributes">
  
    <anyAttribute processContents="skip"/>

  </attributeGroup>


  <!-- declaration of the EDGE-LABEL ATTRIBUTE;
       the label attribute is optional which should be refined by a 
       specialised, corpus-dependent schema -->

  <attributeGroup name="edgelabelAttribute">
  
    <attribute name="label" type="xsd:string" use="optional"/>    

  </attributeGroup>
    

  <!-- declaration of the SECONDARY-EDGE-LABEL ATTRIBUTE;
       the label attribute is optional which should be refined by a 
       specialised, corpus-dependent schema -->

  <attributeGroup name="secedgelabelAttribute">
  
    <attribute name="label" type="xsd:string" use="optional"/>    

  </attributeGroup>
 

  <!-- ======================================================
       ID and IDREF TYPE DECLARATIONS
       ====================================================== -->

  <!-- Even though XML Schema are a W3C Recommendation, schema
       support of XML parsers is still restricted. Using some
       parsers you might have problems with the ID and IDREF
       attributes in combination with an "anyAttribute"
       declaration. In this case, just modify the base type 
       of the following two declarations to "xsd:string".  -->


  <!-- declaration of idType -->

  <simpleType name="idType">

    <restriction base="xsd:ID"/>

  </simpleType>


  <!-- declaration of idrefType -->

  <simpleType name="idrefType">

    <restriction base="xsd:IDREF"/>

  </simpleType>


</schema>