|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectgr.demokritos.iit.jinsect.console.summaryEvaluator
public class summaryEvaluator
A class of objects that can evaluate a set of summaries, using n-gram graph representation.
Field Summary | |
---|---|
protected java.lang.Integer |
CharDist
|
protected java.lang.Integer |
CharMax
|
protected java.lang.Integer |
CharMin
|
protected java.lang.String |
Do
|
static java.lang.String |
DO_ALL
Constant defining union (char and word) n-gram method. |
static java.lang.String |
DO_CHARS
Constant defining char n-gram method. |
static java.lang.String |
DO_WORDS
Constant defining word n-gram method. |
protected java.util.Hashtable |
hModelCache
Word n-gram graph representation cache. |
protected java.util.Hashtable |
hNModelCache
Character n-gram graph representation cache. |
protected java.lang.String |
ModelDir
|
protected java.lang.String |
OutFile
|
protected java.util.concurrent.Semaphore |
OutputSemaphore
|
protected java.lang.String |
SummaryDir
|
protected java.lang.Integer |
Threads
|
static int |
USE_DISTRO_AVERAGE_AS_WEIGHT
Constant to use distribution average as edge weight in the n-gram graph. |
static int |
USE_OCCURENCES_AS_WEIGHT
Constant to use co-occurence cardinality as edge weight in the n-gram graph. |
protected java.lang.Integer |
WeightMethod
|
protected java.lang.Integer |
WordDist
|
protected java.lang.Integer |
WordMax
|
protected java.lang.Integer |
WordMin
|
Constructor Summary | |
---|---|
summaryEvaluator(java.util.concurrent.Semaphore sOutputSemaphore,
java.lang.String sDo,
int iWordMin,
int iWordMax,
int iWordDist,
int iCharMin,
int iCharMax,
int iCharDist,
int iThreads,
java.lang.String sOutFile,
java.lang.String sSummaryDir,
java.lang.String sModelDir,
boolean bSilent,
int iWeightMethod,
boolean bProgress)
Creates a summaryEvaluator object. |
|
summaryEvaluator(java.lang.String[] args)
Creates a summaryEvaluator object, given a command-line like string. |
Method Summary | |
---|---|
protected SimilarityArray |
calcDistroSimilarityMeasures(CategorizedFileEntry cfeCur,
java.util.List dsModelSet,
boolean bOutput,
java.io.PrintStream pOut,
java.util.concurrent.Semaphore sSem,
int WordNGramSize_Min,
int WordNGramSize_Max,
int Word_Dmax,
int CharacterNGramSize_Min,
int CharacterNGramSize_Max,
int Character_Dmax,
boolean bDoCharNGrams,
boolean bDoWordNGrams,
boolean bSilent)
Performs similarity measurement of a CategorizedFileEntry , given a model set. |
protected SimilarityArray |
calcSimilarityMeasures(CategorizedFileEntry cfeCur,
java.util.List dsModelSet,
boolean bOutput,
java.io.PrintStream pOut,
java.util.concurrent.Semaphore sSem,
int WordNGramSize_Min,
int WordNGramSize_Max,
int Word_Dmax,
int CharacterNGramSize_Min,
int CharacterNGramSize_Max,
int Character_Dmax,
boolean bDoCharNGrams,
boolean bDoWordNGrams,
boolean bSilent)
Performs similarity measurement of a CategorizedFileEntry , given a model set. |
protected SimilarityArray |
calcSimilarityMeasures(CategorizedFileEntry cfeCur,
java.util.List dsModelSet,
boolean bOutput,
java.util.concurrent.Semaphore sSem)
Performs similarity measurement of a CategorizedFileEntry , given a model set. |
protected void |
doNormalEval(java.util.concurrent.Semaphore sSem,
java.io.PrintStream pOverallResultsOutStream,
int WordNGramSize_Min,
int WordNGramSize_Max,
int Word_Dmax,
int CharacterNGramSize_Min,
int CharacterNGramSize_Max,
int Character_Dmax,
boolean bDoCharNGrams,
boolean bDoWordNGrams,
java.lang.String sSummaryDir,
java.lang.String sModelDir,
int iThreads,
boolean bSilent,
boolean bProgress)
TODO |
protected void |
doOptimizedEval(java.util.concurrent.Semaphore sSem,
java.io.PrintStream pOverallResultsOutStream,
int WordNGramSize_Min,
int WordNGramSize_Max,
int Word_Dmax,
int CharacterNGramSize_Min,
int CharacterNGramSize_Max,
int Character_Dmax,
boolean bDoCharNGrams,
boolean bDoWordNGrams,
java.lang.String sSummaryDir,
java.lang.String sModelDir,
int iThreads,
boolean bSilent,
boolean bProgress)
Performs optimized evaluation of a given set of summaries, given a model directory. |
static void |
main(java.lang.String[] args)
|
void |
run()
Performs the evaluation step in a thread-safe way. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int USE_DISTRO_AVERAGE_AS_WEIGHT
public static final int USE_OCCURENCES_AS_WEIGHT
public static final java.lang.String DO_WORDS
public static final java.lang.String DO_CHARS
public static final java.lang.String DO_ALL
protected java.lang.Integer WordMin
protected java.lang.Integer WordMax
protected java.lang.Integer WordDist
protected java.lang.Integer CharMin
protected java.lang.Integer CharMax
protected java.lang.Integer CharDist
protected java.lang.Integer Threads
protected java.lang.Integer WeightMethod
protected java.lang.String OutFile
protected java.lang.String SummaryDir
protected java.lang.String ModelDir
protected java.lang.String Do
protected java.util.concurrent.Semaphore OutputSemaphore
protected java.util.Hashtable hModelCache
protected java.util.Hashtable hNModelCache
Constructor Detail |
---|
public summaryEvaluator(java.util.concurrent.Semaphore sOutputSemaphore, java.lang.String sDo, int iWordMin, int iWordMax, int iWordDist, int iCharMin, int iCharMax, int iCharDist, int iThreads, java.lang.String sOutFile, java.lang.String sSummaryDir, java.lang.String sModelDir, boolean bSilent, int iWeightMethod, boolean bProgress)
sOutputSemaphore
- A semaphore that ensures that the output is provided consistently.sDo
- The method of evaluation (see DO_WORDS, DO_CHAR, DO_ALL
).iWordMin
- The min word n-gram rank to take into account, if applicable to the method.iWordMax
- The max word n-gram rank to take into account, if applicable to the method.iWordDist
- The word n-gram neighbourhood distance to use, if applicable to the method.iCharMin
- The min char n-gram rank to take into account, if applicable to the method.iCharMax
- The max char n-gram rank to take into account, if applicable to the method.iCharDist
- The char n-gram neighbourhood distance to use, if applicable to the method.iThreads
- The number of threads to use, for multi-threaded processing.sOutFile
- The file to output results.sSummaryDir
- The peer summary base directory.sModelDir
- The model summaries base directory.bSilent
- If true, no debug messages are output.iWeightMethod
- The method to use for weighting edges in the n-gram graph. See
USE_DISTRO_AVERAGE_AS_WEIGHT, USE_OCCURENCES_AS_WEIGHT
.bProgress
- If true, indicates that progress indication should be output, even in silent
mode.public summaryEvaluator(java.lang.String[] args)
args
- An array of strings, corresponding to command-line parsed parameters.Method Detail |
---|
public static void main(java.lang.String[] args)
args
- the command line argumentspublic void run()
run
in interface java.lang.Runnable
protected SimilarityArray calcSimilarityMeasures(CategorizedFileEntry cfeCur, java.util.List dsModelSet, boolean bOutput, java.util.concurrent.Semaphore sSem)
CategorizedFileEntry
, given a model set. It
uses default values for the n-gram graphs' parameters, performing a test for both word and
character n-grams.
cfeCur
- The current file to compare to models.dsModelSet
- The input model set.bOutput
- If true, output is verbose.sSem
- The semaphore to use to ascertain that output is consistent and thread-safe.
SimilarityArray
containing similarity values for the given file.protected SimilarityArray calcSimilarityMeasures(CategorizedFileEntry cfeCur, java.util.List dsModelSet, boolean bOutput, java.io.PrintStream pOut, java.util.concurrent.Semaphore sSem, int WordNGramSize_Min, int WordNGramSize_Max, int Word_Dmax, int CharacterNGramSize_Min, int CharacterNGramSize_Max, int Character_Dmax, boolean bDoCharNGrams, boolean bDoWordNGrams, boolean bSilent)
CategorizedFileEntry
, given a model set.
cfeCur
- The current file to compare to models.dsModelSet
- The input model set.bOutput
- If true, output is verbose.pOut
- The PrintStream
to use for output.sSem
- The semaphore to use to ascertain that output is consistent and thread-safe.WordNGramSize_Min
- The min word n-gram rank to use in the representation.WordNGramSize_Max
- The max word n-gram rank to use in the representation.Word_Dmax
- The max neighbourhood distance to use in the word n-gram graph
representation.CharacterNGramSize_Min
- The min character n-gram rank to use in the representation.CharacterNGramSize_Max
- The max character n-gram rank to use in the representation.Character_Dmax
- The max neighbourhood distance to use in the character n-gram graph
representation.bDoCharNGrams
- If true performs character n-gram comparison. Can be used together with
bDoWordNGrams
.bDoWordNGrams
- If true performs word n-gram comparison. Can be used together with
bDoCharNGrams
.bSilent
- If true, no debugging information is displayed.
SimilarityArray
containing similarity values for the given file.protected SimilarityArray calcDistroSimilarityMeasures(CategorizedFileEntry cfeCur, java.util.List dsModelSet, boolean bOutput, java.io.PrintStream pOut, java.util.concurrent.Semaphore sSem, int WordNGramSize_Min, int WordNGramSize_Max, int Word_Dmax, int CharacterNGramSize_Min, int CharacterNGramSize_Max, int Character_Dmax, boolean bDoCharNGrams, boolean bDoWordNGrams, boolean bSilent)
CategorizedFileEntry
, given a model set.
cfeCur
- The current file to compare to models.dsModelSet
- The input model set.bOutput
- If true, output is verbose.pOut
- The PrintStream
to use for output.sSem
- The semaphore to use to ascertain that output is consistent and thread-safe.WordNGramSize_Min
- The min word n-gram rank to use in the representation.WordNGramSize_Max
- The max word n-gram rank to use in the representation.Word_Dmax
- The max neighbourhood distance to use in the word n-gram graph
representation.CharacterNGramSize_Min
- The min character n-gram rank to use in the representation.CharacterNGramSize_Max
- The max character n-gram rank to use in the representation.Character_Dmax
- The max neighbourhood distance to use in the character n-gram graph
representation.bDoCharNGrams
- If true performs character n-gram comparison. Can be used together with
bDoWordNGrams
.bDoWordNGrams
- If true performs word n-gram comparison. Can be used together with
bDoCharNGrams
.bSilent
- If true, no debugging information is displayed.
SimilarityArray
containing similarity values for the given file.protected void doOptimizedEval(java.util.concurrent.Semaphore sSem, java.io.PrintStream pOverallResultsOutStream, int WordNGramSize_Min, int WordNGramSize_Max, int Word_Dmax, int CharacterNGramSize_Min, int CharacterNGramSize_Max, int Character_Dmax, boolean bDoCharNGrams, boolean bDoWordNGrams, java.lang.String sSummaryDir, java.lang.String sModelDir, int iThreads, boolean bSilent, boolean bProgress) throws java.lang.Exception
sSem
- A semaphore that ensures that the output is provided consistently.pOverallResultsOutStream
- The output stream for results.WordNGramSize_Min
- The min word n-gram rank to take into account, if applicable to the method.WordNGramSize_Max
- The max word n-gram rank to take into account, if applicable to the method.Word_Dmax
- The word n-gram neighbourhood distance to use, if applicable to the method.CharacterNGramSize_Min
- The min char n-gram rank to take into account, if applicable to the method.CharacterNGramSize_Max
- The max char n-gram rank to take into account, if applicable to the method.Character_Dmax
- The char n-gram neighbourhood distance to use, if applicable to the method.bDoCharNGrams
- If true, char n-grams evaluation is performed.bDoWordNGrams
- If true, word n-grams evaluation is performed.sSummaryDir
- The peer summary base directory.sModelDir
- The model summaries base directory.bSilent
- If true, no debug messages are output.bProgress
- If true, indicates that progress indication should be output, even in silent
mode.
java.lang.Exception
protected void doNormalEval(java.util.concurrent.Semaphore sSem, java.io.PrintStream pOverallResultsOutStream, int WordNGramSize_Min, int WordNGramSize_Max, int Word_Dmax, int CharacterNGramSize_Min, int CharacterNGramSize_Max, int Character_Dmax, boolean bDoCharNGrams, boolean bDoWordNGrams, java.lang.String sSummaryDir, java.lang.String sModelDir, int iThreads, boolean bSilent, boolean bProgress)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |