public class UniSylIOUtils
extends java.lang.Object
Header Property Keys
Key | Description |
---|---|
columns | column format specification, see below |
separator | regular expression used to split columns in the data |
documentBegin | line prefix that indicates beginning of a new document |
documentEnd | line prefix that indicates the end of the current document |
sentenceBegin | line prefix that indicates the beginning of a new sentence |
sentenceEnd | line prefix that indicates the end of the current sentence |
Column Properties
Position | Property | Description |
---|---|---|
1 | key | key that should be used to save the content of the column on the respective member |
2 | type | string form of the column's type |
3 | level | level within the document set the content of the column is to be assigned to |
(4) | role | optional hint on whether the content of the column is meant to be used as a delimiter
or whether it contains aggregated data |
(5) | separator | optional info on what separator is to be used in case the column is marked to contain
aggregated data (the default separator is the pipe character '|') |
Constants
Modifier and Type | Class and Description |
---|---|
static class |
UniSylIOUtils.AnnotationLevel |
static class |
UniSylIOUtils.Column |
static class |
UniSylIOUtils.ColumnType |
static class |
UniSylIOUtils.ComparingDelimiter |
static class |
UniSylIOUtils.CompoundDelimiter |
static interface |
UniSylIOUtils.Delimiter |
static class |
UniSylIOUtils.EmptyLineDelimiter |
static class |
UniSylIOUtils.LinePrefixDelimiter |
static class |
UniSylIOUtils.PropertyDelimiter |
static class |
UniSylIOUtils.UniSylConfig |
Modifier and Type | Field and Description |
---|---|
static int |
BEGIN_ELEMENT |
static java.lang.String |
CONST_NEWLINE |
static java.lang.String |
CONST_NEWLINES |
static java.lang.String |
CONST_SPACE |
static java.lang.String |
CONST_TAB |
static java.lang.String |
CONST_WHITESPACE |
static int |
END_ELEMENT |
static int |
IGNORE_NEWLINES |
static java.lang.String |
KEY_ACCENT_EXCURSION |
static java.lang.String |
KEY_ADJUST_DEPENDENCY_HEADS |
static java.lang.String |
KEY_COLUMNS |
static java.lang.String |
KEY_COREF_PROPERTY_KEY |
static java.lang.String |
KEY_CREATE_COREF_STRUCTURE |
static java.lang.String |
KEY_DECODE_FESTIVAL_UMLAUTS |
static java.lang.String |
KEY_DOCUMENT_BEGIN |
static java.lang.String |
KEY_DOCUMENT_END |
static java.lang.String |
KEY_DOCUMENT_ID_FORMAT |
static java.lang.String |
KEY_EMPTY_CONTENT |
static java.lang.String |
KEY_IGNORE_COLUMN_COUNT_MISMATCH |
static java.lang.String |
KEY_LOCAL_SAMPA_RULES_FILE |
static java.lang.String |
KEY_MARK_ACCENT_ON_WORDS |
static java.lang.String |
KEY_ONLY_CONSIDER_STRESSED_SYLLABLES |
static java.lang.String |
KEY_SCHEME |
static java.lang.String |
KEY_SENTENCE_BEGIN |
static java.lang.String |
KEY_SENTENCE_END |
static java.lang.String |
KEY_SEPARATOR |
static java.lang.String |
KEY_SKIP_EMPTY_LINES |
static java.lang.String |
KEY_SYLLABLE_OFFSETS_FROM_SAMPA |
static java.lang.String |
KEY_WORD_BEGIN |
static java.lang.String |
KEY_WORD_END |
static java.lang.String |
ROLE_AGGREGATE |
static java.lang.String |
ROLE_DELIMITER |
static int |
SAME_ELEMENT |
Constructor and Description |
---|
UniSylIOUtils() |
Modifier and Type | Method and Description |
---|---|
static boolean |
isBeginElement(int delimiterResult) |
static boolean |
isEndElement(int delimiterResult) |
static boolean |
isIgnoreNewlines(int delimiterResult) |
static boolean |
isSameElement(int delimiterResult) |
static UniSylIOUtils.UniSylConfig |
readConfig(de.ims.icarus.util.strings.CharLineBuffer buffer)
Reads in the header area of a UniSyl file and places the line cursor at the
first line after the header.
|
static java.lang.String |
resolveConstant(java.lang.String s) |
public static final java.lang.String KEY_COLUMNS
public static final java.lang.String KEY_DOCUMENT_BEGIN
public static final java.lang.String KEY_DOCUMENT_END
public static final java.lang.String KEY_SENTENCE_BEGIN
public static final java.lang.String KEY_SENTENCE_END
public static final java.lang.String KEY_WORD_BEGIN
public static final java.lang.String KEY_WORD_END
public static final java.lang.String KEY_SEPARATOR
public static final java.lang.String KEY_SCHEME
public static final java.lang.String KEY_SKIP_EMPTY_LINES
public static final java.lang.String KEY_SYLLABLE_OFFSETS_FROM_SAMPA
public static final java.lang.String KEY_LOCAL_SAMPA_RULES_FILE
public static final java.lang.String KEY_MARK_ACCENT_ON_WORDS
public static final java.lang.String KEY_ONLY_CONSIDER_STRESSED_SYLLABLES
public static final java.lang.String KEY_ACCENT_EXCURSION
public static final java.lang.String KEY_EMPTY_CONTENT
public static final java.lang.String KEY_ADJUST_DEPENDENCY_HEADS
public static final java.lang.String KEY_CREATE_COREF_STRUCTURE
public static final java.lang.String KEY_COREF_PROPERTY_KEY
public static final java.lang.String KEY_DECODE_FESTIVAL_UMLAUTS
public static final java.lang.String KEY_IGNORE_COLUMN_COUNT_MISMATCH
public static final java.lang.String KEY_DOCUMENT_ID_FORMAT
public static final java.lang.String ROLE_DELIMITER
public static final java.lang.String ROLE_AGGREGATE
public static final java.lang.String CONST_TAB
public static final java.lang.String CONST_SPACE
public static final java.lang.String CONST_NEWLINE
public static final java.lang.String CONST_NEWLINES
public static final java.lang.String CONST_WHITESPACE
public static final int BEGIN_ELEMENT
public static final int END_ELEMENT
public static final int SAME_ELEMENT
public static final int IGNORE_NEWLINES
public static java.lang.String resolveConstant(java.lang.String s)
public static UniSylIOUtils.UniSylConfig readConfig(de.ims.icarus.util.strings.CharLineBuffer buffer) throws java.io.IOException
buffer
- java.io.IOException
public static boolean isBeginElement(int delimiterResult)
public static boolean isEndElement(int delimiterResult)
public static boolean isSameElement(int delimiterResult)
public static boolean isIgnoreNewlines(int delimiterResult)