public class NGrams
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected java.util.List<de.ims.icarus.language.dependency.DependencySentenceData> |
corpus |
protected int |
fringeSize |
protected java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> |
nGramCache |
protected int |
nGramCount |
protected int |
nGramLimit |
protected de.ims.icarus.util.Options |
options |
protected java.util.List<NGramQAttributes> |
queryList |
protected boolean |
useFringe |
protected boolean |
useNumberWildcard |
Constructor and Description |
---|
NGrams() |
NGrams(int nGramCount,
de.ims.icarus.util.Options options,
de.ims.icarus.search_tools.Search search) |
NGrams(de.ims.icarus.util.Options options,
java.util.List<NGramQAttributes> queryList,
de.ims.icarus.search_tools.Search search)
Constructor for NGramSearch (main plugin)
|
Modifier and Type | Method and Description |
---|---|
void |
cleanUpNucleus() |
protected java.lang.String |
ensureValid(java.lang.String input) |
static NGrams |
getInstance() |
int |
getMiningMode() |
int |
getPasses() |
java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> |
getResult() |
java.lang.String |
getTag(de.ims.icarus.language.dependency.DependencySentenceData dd,
int wordIndex,
int mode) |
protected java.lang.String |
getTagQuery(java.lang.String qtag) |
java.util.List<java.lang.String> |
getVariationForTag(java.lang.String tag) |
void |
initializeUniGrams(de.ims.icarus.language.dependency.DependencySentenceData dd,
int sentenceNr)
Step 1) Initialize Corpus / Create uniGrams
Loop trough the Corpus and add all occuring Words with their specific PoSTags
to the nGramCache.
|
protected java.util.ArrayList<java.lang.Integer> |
involvedSentences(java.lang.String key) |
protected boolean |
isNuclei(java.lang.String key) |
protected boolean |
isNucleiList(java.lang.String key,
java.util.ArrayList<java.lang.Integer> arrayList) |
java.util.List<java.lang.String> |
nGramPoSFilter(java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> outputNGram,
int filter)
maybe extension filter for dependency structure
(show error pos + dependency)
|
void |
nGramResults()
Print out Resulting nGrams:
|
protected void |
nGramResults(java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> inputNGram) |
void |
outputToFile() |
protected SentenceInfo |
returnSentenceInfoNREqual(java.util.ArrayList<ItemInNuclei> l1,
int sentenceNR) |
void |
setMiningMode(int miningMode) |
protected int nGramCount
protected int fringeSize
protected int nGramLimit
protected boolean useFringe
protected boolean useNumberWildcard
protected java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> nGramCache
protected java.util.List<NGramQAttributes> queryList
protected de.ims.icarus.util.Options options
protected java.util.List<de.ims.icarus.language.dependency.DependencySentenceData> corpus
public NGrams()
public NGrams(de.ims.icarus.util.Options options, java.util.List<NGramQAttributes> queryList, de.ims.icarus.search_tools.Search search)
options
- queryList
- nGramSearch
- search
- public NGrams(int nGramCount, de.ims.icarus.util.Options options, de.ims.icarus.search_tools.Search search)
public static NGrams getInstance()
public int getMiningMode()
public void setMiningMode(int miningMode)
miningMode
- the miningMode to setprotected java.lang.String ensureValid(java.lang.String input)
protected SentenceInfo returnSentenceInfoNREqual(java.util.ArrayList<ItemInNuclei> l1, int sentenceNR)
l1
- sentenceNR
- public java.util.List<java.lang.String> getVariationForTag(java.lang.String tag)
public void initializeUniGrams(de.ims.icarus.language.dependency.DependencySentenceData dd, int sentenceNr)
dd
- sentenceNr
- public java.lang.String getTag(de.ims.icarus.language.dependency.DependencySentenceData dd, int wordIndex, int mode)
protected boolean isNuclei(java.lang.String key)
protected java.util.ArrayList<java.lang.Integer> involvedSentences(java.lang.String key)
protected boolean isNucleiList(java.lang.String key, java.util.ArrayList<java.lang.Integer> arrayList)
protected java.lang.String getTagQuery(java.lang.String qtag)
public java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> getResult()
public int getPasses()
public void outputToFile()
javax.xml.parsers.ParserConfigurationException
public void cleanUpNucleus()
protected void nGramResults(java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> inputNGram)
public void nGramResults()
java.lang.InterruptedException
public java.util.List<java.lang.String> nGramPoSFilter(java.util.Map<java.lang.String,java.util.ArrayList<ItemInNuclei>> outputNGram, int filter)
outputNGram
- filter
-