Squashed initial commit
This commit is contained in:
165
qhtcp-workflow/apps/java/weka-clustering/src/Information.java
Executable file
165
qhtcp-workflow/apps/java/weka-clustering/src/Information.java
Executable file
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author DTian
|
||||
*/
|
||||
public class Information {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param key the key of the dictionary
|
||||
* @return the entropy
|
||||
*/
|
||||
public static double entropy(String [] data ) {
|
||||
double entropy = 0;
|
||||
|
||||
// Frequency table
|
||||
HashMap freqDict = new HashMap();
|
||||
int one = 1;
|
||||
|
||||
for(int i=0; i<data.length; i++){
|
||||
String newkey = data[i];
|
||||
if (freqDict.containsKey(newkey)) {
|
||||
int val = Integer.parseInt(freqDict.get(newkey).toString());
|
||||
freqDict.remove(newkey);
|
||||
val = val + 1;
|
||||
freqDict.put(newkey, val + "");
|
||||
} else {
|
||||
freqDict.put(newkey, (one + ""));
|
||||
}
|
||||
}
|
||||
|
||||
// Probability table
|
||||
HashMap probDict = new HashMap();
|
||||
Iterator it = freqDict.keySet().iterator();
|
||||
String newkey = "";
|
||||
while (it.hasNext()) {
|
||||
newkey = (String) it.next();
|
||||
double value = 0.0;
|
||||
value = Double.parseDouble((String) freqDict.get(newkey)) / data.length;
|
||||
probDict.put(newkey, value + "");
|
||||
}
|
||||
|
||||
// Calculate entropy
|
||||
it = probDict.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
newkey = (String) it.next();
|
||||
double value = 0.0;
|
||||
value = Double.parseDouble((String) probDict.get(newkey));
|
||||
entropy = entropy - value * (Math.log(value) / Math.log(2));
|
||||
}
|
||||
return entropy;
|
||||
}
|
||||
|
||||
public static double relativeEntropy(String[] data1, String[] data2) {
|
||||
|
||||
double result = 0;
|
||||
// System.out.println(data1.length);
|
||||
|
||||
// Frequency table
|
||||
HashMap freqDict1 = new HashMap();
|
||||
int one = 1;
|
||||
for(int i=0; i<data1.length; i++){
|
||||
Object key = data1[i];
|
||||
if(freqDict1.containsKey(key)){
|
||||
int val = Integer.parseInt( freqDict1.get(key).toString());
|
||||
//freqDict1.remove(key);
|
||||
val++;
|
||||
freqDict1.put(key, val + "");
|
||||
} else {
|
||||
freqDict1.put(key, (one + ""));
|
||||
}
|
||||
}
|
||||
|
||||
// toFileHM(freqDict1, "FreqDict1.txt");
|
||||
HashMap freqDict2 = new HashMap();
|
||||
for (int i=0; i<data2.length; i++) {
|
||||
Object key = data2[i];
|
||||
if (freqDict2.containsKey(key)) {
|
||||
int val = Integer.parseInt(freqDict2.get(key).toString());
|
||||
//freqDict2.remove(key);
|
||||
val++;
|
||||
freqDict2.put(key, val + "");
|
||||
} else {
|
||||
freqDict2.put(key, (one + ""));
|
||||
}
|
||||
}
|
||||
|
||||
// Probability table
|
||||
HashMap<Object, Object> probDict1 = new HashMap<Object, Object>();
|
||||
HashMap<Object, Object> probDict2 = new HashMap<Object, Object>();
|
||||
Iterator it = freqDict1.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object newkey = it.next();
|
||||
double value = 0;
|
||||
value = Double.parseDouble((String) freqDict1.get(newkey)) / data1.length;
|
||||
probDict1.put(newkey, value + "");
|
||||
}
|
||||
|
||||
it = freqDict2.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object newkey = it.next();
|
||||
double value = 0;
|
||||
value = Double.parseDouble((String) freqDict2.get(newkey)) / data2.length;
|
||||
probDict2.put(newkey, value + "");
|
||||
}
|
||||
|
||||
// Calculate the relative entropy
|
||||
it = probDict1.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object newkey = it.next();
|
||||
Object value1 = probDict1.get(newkey);
|
||||
//Object value2 = probDict2.get(newkey);
|
||||
double dValue1 = Double.parseDouble(probDict1.get(newkey).toString());
|
||||
double dValue2 = Double.parseDouble(probDict2.get(newkey).toString());
|
||||
if ( value1.toString().trim().compareToIgnoreCase("1.0") == 0) {
|
||||
result = result + dValue1 * (Math.log(dValue1/dValue2) / Math.log(2));
|
||||
} else if (value1.toString().trim().compareToIgnoreCase("0") == 0){
|
||||
result = result + (1-dValue1) * (Math.log((1-dValue1)/(1-dValue2)) / Math.log(2));
|
||||
} else {
|
||||
result = result + dValue1 * (Math.log(dValue1/dValue2) / Math.log(2));
|
||||
result = result + (1-dValue1) * (Math.log((1-dValue1)/(1-dValue2)) / Math.log(2));
|
||||
}
|
||||
// toFile(result+"", "probDict1.txt");
|
||||
// toFile(result, "resultsOfresult.txt");//check point by Jingyu
|
||||
}
|
||||
//toFile(probDict1.size()+ "*******************", "probDict1.txt");
|
||||
//System.out.println("relative entropy = " + result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void toFile(String data, String filename) {
|
||||
|
||||
// Output to file
|
||||
try {
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(filename,true));
|
||||
writer.write(data + "\n");
|
||||
writer.close();
|
||||
} catch (Exception e) {
|
||||
System.err.println(e.getStackTrace());
|
||||
}
|
||||
}
|
||||
|
||||
private static void toFileHM(HashMap data, String filename) {
|
||||
|
||||
// Output to file
|
||||
try {
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(filename, true));
|
||||
for (Object key : data.keySet()) {
|
||||
writer.write(key.toString() +":"+ data.get(key)+"\n");
|
||||
}
|
||||
writer.close();
|
||||
} catch (Exception e) {
|
||||
System.err.println(e.getStackTrace());
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user