gr.demokritos.iit.jinsect.casc.console
Class casc
java.lang.Object
gr.demokritos.iit.jinsect.casc.console.casc
public class casc
- extends java.lang.Object
Helper class used in CASC evaluation of 2006 (see
Computer-Assisted Stemmatology Challenge).
Constructor Summary |
casc()
|
Method Summary |
static void |
addParent(java.util.HashMap hParents,
salvo.jesus.graph.Vertex vA,
salvo.jesus.graph.Vertex vB,
CASCGraph gTree,
double dWeightToParent)
|
static double |
calcCASCDistanceBetween(CASCGraph gCorrect,
CASCGraph g2)
|
static double |
calcDistanceBetween(CASCGraph gCorrect,
CASCGraph g2)
|
static double |
calcDistanceBetween(CASCGraph gCorrect,
CASCGraph g2,
boolean bSilent)
|
static double |
euclideanDistance(Distribution p,
Distribution q)
|
static CASCGraph |
getDefaultGraph()
|
static CASCGraph |
getDummyGraph()
|
static java.util.ListIterator |
getEdgeIteratorByWeight(UniqueVertexGraph g)
Returns a list iterator object that runs through the edge set of a given graph
in weight ascending order. |
static CASCGraph |
getExampleGraph()
|
static java.lang.String |
getFileNameOnly(java.lang.String sFilePath)
|
static CASCGraph |
getLDABasedGraph()
Returns a graph based on LDA analysis of the underlying n-gram frequencies. |
static CASCGraph |
getLevenshteinDistanceGraph(java.lang.String sInputDir)
|
static CASCGraph |
getNGramDistanceAgglomerativelyClusteredGraph(java.lang.String sInputDir,
int iMinNGram,
int iMaxNGram,
int iDist,
double dGraphImportance,
java.lang.String sClusteringType)
|
static CASCGraph |
getNGramDistanceGraph(java.lang.String sInputDir,
int iMinNGram,
int iMaxNGram,
int iDist,
double dGraphImportance,
boolean bSilent,
java.util.TreeMap hLoadedDocs)
|
static CASCGraph |
getRandomGraph()
|
static java.lang.String |
graphToCASCDot(CASCGraph gTree)
Renders a graph to its DOT representation (See GraphViz for more info on the format). |
static void |
main(java.lang.String[] args)
|
static double |
manhattanDistance(Distribution p,
Distribution q)
|
static double |
MPDDistance(Distribution p,
Distribution q)
|
static void |
testGraphDist()
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
SINGLE_LINK
public static final java.lang.String SINGLE_LINK
- See Also:
- Constant Field Values
COMPLETE_LINK
public static final java.lang.String COMPLETE_LINK
- See Also:
- Constant Field Values
AVERAGE_LINK
public static final java.lang.String AVERAGE_LINK
- See Also:
- Constant Field Values
casc
public casc()
main
public static void main(java.lang.String[] args)
getLevenshteinDistanceGraph
public static CASCGraph getLevenshteinDistanceGraph(java.lang.String sInputDir)
getEdgeIteratorByWeight
public static java.util.ListIterator getEdgeIteratorByWeight(UniqueVertexGraph g)
- Returns a list iterator object that runs through the edge set of a given graph
in weight ascending order.
- Parameters:
g
- The given UniqueVertexGraph
.
- Returns:
- A list iterator on the sorted graph edges.
getNGramDistanceGraph
public static CASCGraph getNGramDistanceGraph(java.lang.String sInputDir,
int iMinNGram,
int iMaxNGram,
int iDist,
double dGraphImportance,
boolean bSilent,
java.util.TreeMap hLoadedDocs)
getNGramDistanceAgglomerativelyClusteredGraph
public static CASCGraph getNGramDistanceAgglomerativelyClusteredGraph(java.lang.String sInputDir,
int iMinNGram,
int iMaxNGram,
int iDist,
double dGraphImportance,
java.lang.String sClusteringType)
getLDABasedGraph
public static CASCGraph getLDABasedGraph()
- Returns a graph based on LDA analysis of the underlying n-gram frequencies.
The file CASC/LDAcascOutput.txt is expected as input.
euclideanDistance
public static double euclideanDistance(Distribution p,
Distribution q)
- Parameters:
p
- the first distribution, as a Distribution
.q
- the second distribution, as a Distribution
.
- Returns:
- zero if q and p are equal - the euclidean distance, viewing
the distributions as feature vectors.
MPDDistance
public static double MPDDistance(Distribution p,
Distribution q)
- Parameters:
p
- the first distribution, as a Distribution
.q
- the second distribution, as a Distribution
.
- Returns:
- zero if q and p are equal - the MPD distance, viewing
the distributions as feature vectors.
manhattanDistance
public static double manhattanDistance(Distribution p,
Distribution q)
- Parameters:
p
- the first distribution, as a Distribution
.q
- the second distribution, as a Distribution
.
- Returns:
- zero if q and p are equal - the Manhattan distance, viewing
the distributions as feature vectors.
addParent
public static void addParent(java.util.HashMap hParents,
salvo.jesus.graph.Vertex vA,
salvo.jesus.graph.Vertex vB,
CASCGraph gTree,
double dWeightToParent)
calcCASCDistanceBetween
public static double calcCASCDistanceBetween(CASCGraph gCorrect,
CASCGraph g2)
calcDistanceBetween
public static double calcDistanceBetween(CASCGraph gCorrect,
CASCGraph g2)
calcDistanceBetween
public static double calcDistanceBetween(CASCGraph gCorrect,
CASCGraph g2,
boolean bSilent)
getExampleGraph
public static CASCGraph getExampleGraph()
getDummyGraph
public static CASCGraph getDummyGraph()
getDefaultGraph
public static CASCGraph getDefaultGraph()
getRandomGraph
public static CASCGraph getRandomGraph()
getFileNameOnly
public static java.lang.String getFileNameOnly(java.lang.String sFilePath)
testGraphDist
public static void testGraphDist()
graphToCASCDot
public static java.lang.String graphToCASCDot(CASCGraph gTree)
- Renders a graph to its DOT representation (See GraphViz for more info on the format). The DOT
file follows the CASC (Stemmatology Challenge) directives.
- Parameters:
gTree
- The input graph.
- Returns:
- The DOT formatted string representation of the graph.