public class TrecDocMaker extends BasicDocMaker
Config properties:
| Modifier and Type | Field and Description |
|---|---|
protected java.io.File |
dataDir |
protected java.lang.ThreadLocal |
dateFormat |
protected java.util.ArrayList |
inputFiles |
protected int |
iteration |
protected int |
nextFile |
protected java.io.BufferedReader |
reader |
BODY_FIELD, BYTES_FIELD, config, DATE_FIELD, forever, ID_FIELD, indexVal, NAME_FIELD, storeVal, termVecVal, TITLE_FIELD| Constructor and Description |
|---|
TrecDocMaker() |
| Modifier and Type | Method and Description |
|---|---|
protected void |
closeInputs() |
protected java.text.DateFormat |
getDateFormat(int n) |
protected DocData |
getNextDocData()
Return the data of the next document.
|
int |
numUniqueTexts()
Return how many real unique texts are available, 0 if not applicable.
|
protected void |
openNextFile() |
protected java.util.Date |
parseDate(java.lang.String dateStr) |
protected java.lang.StringBuffer |
read(java.lang.String prefix,
java.lang.StringBuffer sb,
boolean collectMatchLine,
boolean collectAll) |
void |
resetInputs()
Reset inputs so that the test run would behave, input wise, as if it just started.
|
void |
setConfig(Config config)
Set the properties
|
addBytes, addUniqueBytes, collectFiles, getByteCount, getCount, getHtmlParser, makeDocument, makeDocument, numUniqueBytes, printDocStatistics, resetUniqueBytes, setHTMLParserprotected java.lang.ThreadLocal dateFormat
protected java.io.File dataDir
protected java.util.ArrayList inputFiles
protected int nextFile
protected int iteration
protected java.io.BufferedReader reader
public void setConfig(Config config)
DocMakersetConfig in interface DocMakersetConfig in class BasicDocMakerprotected void openNextFile()
throws NoMoreDataException,
java.lang.Exception
NoMoreDataExceptionjava.lang.Exceptionprotected void closeInputs()
protected java.lang.StringBuffer read(java.lang.String prefix,
java.lang.StringBuffer sb,
boolean collectMatchLine,
boolean collectAll)
throws java.lang.Exception
java.lang.Exceptionprotected DocData getNextDocData() throws NoMoreDataException, java.lang.Exception
BasicDocMakergetNextDocData in class BasicDocMakerNoMoreDataException - if data is exhausted (and 'forever' set to false).java.lang.Exceptionprotected java.text.DateFormat getDateFormat(int n)
protected java.util.Date parseDate(java.lang.String dateStr)
public void resetInputs()
DocMakerresetInputs in interface DocMakerresetInputs in class BasicDocMakerpublic int numUniqueTexts()
DocMakerCopyright © 2000-2014 Apache Software Foundation. All Rights Reserved.