gr.demokritos.iit.jinsect.casc.console
Class casc

java.lang.Object
  extended by gr.demokritos.iit.jinsect.casc.console.casc

public class casc
extends java.lang.Object

Helper class used in CASC evaluation of 2006 (see Computer-Assisted Stemmatology Challenge).


Field Summary
static java.lang.String AVERAGE_LINK
           
static java.lang.String COMPLETE_LINK
           
static java.lang.String SINGLE_LINK
           
 
Constructor Summary
casc()
           
 
Method Summary
static void addParent(java.util.HashMap hParents, salvo.jesus.graph.Vertex vA, salvo.jesus.graph.Vertex vB, CASCGraph gTree, double dWeightToParent)
           
static double calcCASCDistanceBetween(CASCGraph gCorrect, CASCGraph g2)
           
static double calcDistanceBetween(CASCGraph gCorrect, CASCGraph g2)
           
static double calcDistanceBetween(CASCGraph gCorrect, CASCGraph g2, boolean bSilent)
           
static double euclideanDistance(Distribution p, Distribution q)
           
static CASCGraph getDefaultGraph()
           
static CASCGraph getDummyGraph()
           
static java.util.ListIterator getEdgeIteratorByWeight(UniqueVertexGraph g)
          Returns a list iterator object that runs through the edge set of a given graph in weight ascending order.
static CASCGraph getExampleGraph()
           
static java.lang.String getFileNameOnly(java.lang.String sFilePath)
           
static CASCGraph getLDABasedGraph()
          Returns a graph based on LDA analysis of the underlying n-gram frequencies.
static CASCGraph getLevenshteinDistanceGraph(java.lang.String sInputDir)
           
static CASCGraph getNGramDistanceAgglomerativelyClusteredGraph(java.lang.String sInputDir, int iMinNGram, int iMaxNGram, int iDist, double dGraphImportance, java.lang.String sClusteringType)
           
static CASCGraph getNGramDistanceGraph(java.lang.String sInputDir, int iMinNGram, int iMaxNGram, int iDist, double dGraphImportance, boolean bSilent, java.util.TreeMap hLoadedDocs)
           
static CASCGraph getRandomGraph()
           
static java.lang.String graphToCASCDot(CASCGraph gTree)
          Renders a graph to its DOT representation (See GraphViz for more info on the format).
static void main(java.lang.String[] args)
           
static double manhattanDistance(Distribution p, Distribution q)
           
static double MPDDistance(Distribution p, Distribution q)
           
static void testGraphDist()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SINGLE_LINK

public static final java.lang.String SINGLE_LINK
See Also:
Constant Field Values

COMPLETE_LINK

public static final java.lang.String COMPLETE_LINK
See Also:
Constant Field Values

AVERAGE_LINK

public static final java.lang.String AVERAGE_LINK
See Also:
Constant Field Values
Constructor Detail

casc

public casc()
Method Detail

main

public static void main(java.lang.String[] args)

getLevenshteinDistanceGraph

public static CASCGraph getLevenshteinDistanceGraph(java.lang.String sInputDir)

getEdgeIteratorByWeight

public static java.util.ListIterator getEdgeIteratorByWeight(UniqueVertexGraph g)
Returns a list iterator object that runs through the edge set of a given graph in weight ascending order.

Parameters:
g - The given UniqueVertexGraph.
Returns:
A list iterator on the sorted graph edges.

getNGramDistanceGraph

public static CASCGraph getNGramDistanceGraph(java.lang.String sInputDir,
                                              int iMinNGram,
                                              int iMaxNGram,
                                              int iDist,
                                              double dGraphImportance,
                                              boolean bSilent,
                                              java.util.TreeMap hLoadedDocs)

getNGramDistanceAgglomerativelyClusteredGraph

public static CASCGraph getNGramDistanceAgglomerativelyClusteredGraph(java.lang.String sInputDir,
                                                                      int iMinNGram,
                                                                      int iMaxNGram,
                                                                      int iDist,
                                                                      double dGraphImportance,
                                                                      java.lang.String sClusteringType)

getLDABasedGraph

public static CASCGraph getLDABasedGraph()
Returns a graph based on LDA analysis of the underlying n-gram frequencies. The file CASC/LDAcascOutput.txt is expected as input.


euclideanDistance

public static double euclideanDistance(Distribution p,
                                       Distribution q)
Parameters:
p - the first distribution, as a Distribution.
q - the second distribution, as a Distribution.
Returns:
zero if q and p are equal - the euclidean distance, viewing the distributions as feature vectors.

MPDDistance

public static double MPDDistance(Distribution p,
                                 Distribution q)
Parameters:
p - the first distribution, as a Distribution.
q - the second distribution, as a Distribution.
Returns:
zero if q and p are equal - the MPD distance, viewing the distributions as feature vectors.

manhattanDistance

public static double manhattanDistance(Distribution p,
                                       Distribution q)
Parameters:
p - the first distribution, as a Distribution.
q - the second distribution, as a Distribution.
Returns:
zero if q and p are equal - the Manhattan distance, viewing the distributions as feature vectors.

addParent

public static void addParent(java.util.HashMap hParents,
                             salvo.jesus.graph.Vertex vA,
                             salvo.jesus.graph.Vertex vB,
                             CASCGraph gTree,
                             double dWeightToParent)

calcCASCDistanceBetween

public static double calcCASCDistanceBetween(CASCGraph gCorrect,
                                             CASCGraph g2)

calcDistanceBetween

public static double calcDistanceBetween(CASCGraph gCorrect,
                                         CASCGraph g2)

calcDistanceBetween

public static double calcDistanceBetween(CASCGraph gCorrect,
                                         CASCGraph g2,
                                         boolean bSilent)

getExampleGraph

public static CASCGraph getExampleGraph()

getDummyGraph

public static CASCGraph getDummyGraph()

getDefaultGraph

public static CASCGraph getDefaultGraph()

getRandomGraph

public static CASCGraph getRandomGraph()

getFileNameOnly

public static java.lang.String getFileNameOnly(java.lang.String sFilePath)

testGraphDist

public static void testGraphDist()

graphToCASCDot

public static java.lang.String graphToCASCDot(CASCGraph gTree)
Renders a graph to its DOT representation (See GraphViz for more info on the format). The DOT file follows the CASC (Stemmatology Challenge) directives.

Parameters:
gTree - The input graph.
Returns:
The DOT formatted string representation of the graph.