Information.java 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /*
  2. * To change this template, choose Tools | Templates
  3. * and open the template in the editor.
  4. */
  5. import java.io.BufferedWriter;
  6. import java.io.FileWriter;
  7. import java.util.HashMap;
  8. import java.util.Iterator;
  9. /**
  10. *
  11. * @author DTian
  12. */
  13. public class Information {
  14. /**
  15. *
  16. * @param key the key of the dictionary
  17. * @return the entropy
  18. */
  19. public static double entropy(String [] data ) {
  20. double entropy = 0;
  21. // Frequency table
  22. HashMap freqDict = new HashMap();
  23. int one = 1;
  24. for(int i=0; i<data.length; i++){
  25. String newkey = data[i];
  26. if (freqDict.containsKey(newkey)) {
  27. int val = Integer.parseInt(freqDict.get(newkey).toString());
  28. freqDict.remove(newkey);
  29. val = val + 1;
  30. freqDict.put(newkey, val + "");
  31. } else {
  32. freqDict.put(newkey, (one + ""));
  33. }
  34. }
  35. // Probability table
  36. HashMap probDict = new HashMap();
  37. Iterator it = freqDict.keySet().iterator();
  38. String newkey = "";
  39. while (it.hasNext()) {
  40. newkey = (String) it.next();
  41. double value = 0.0;
  42. value = Double.parseDouble((String) freqDict.get(newkey)) / data.length;
  43. probDict.put(newkey, value + "");
  44. }
  45. // Calculate entropy
  46. it = probDict.keySet().iterator();
  47. while (it.hasNext()) {
  48. newkey = (String) it.next();
  49. double value = 0.0;
  50. value = Double.parseDouble((String) probDict.get(newkey));
  51. entropy = entropy - value * (Math.log(value) / Math.log(2));
  52. }
  53. return entropy;
  54. }
  55. public static double relativeEntropy(String[] data1, String[] data2) {
  56. double result = 0;
  57. // System.out.println(data1.length);
  58. // Frequency table
  59. HashMap freqDict1 = new HashMap();
  60. int one = 1;
  61. for(int i=0; i<data1.length; i++){
  62. Object key = data1[i];
  63. if(freqDict1.containsKey(key)){
  64. int val = Integer.parseInt( freqDict1.get(key).toString());
  65. //freqDict1.remove(key);
  66. val++;
  67. freqDict1.put(key, val + "");
  68. } else {
  69. freqDict1.put(key, (one + ""));
  70. }
  71. }
  72. // toFileHM(freqDict1, "FreqDict1.txt");
  73. HashMap freqDict2 = new HashMap();
  74. for (int i=0; i<data2.length; i++) {
  75. Object key = data2[i];
  76. if (freqDict2.containsKey(key)) {
  77. int val = Integer.parseInt(freqDict2.get(key).toString());
  78. //freqDict2.remove(key);
  79. val++;
  80. freqDict2.put(key, val + "");
  81. } else {
  82. freqDict2.put(key, (one + ""));
  83. }
  84. }
  85. // Probability table
  86. HashMap<Object, Object> probDict1 = new HashMap<Object, Object>();
  87. HashMap<Object, Object> probDict2 = new HashMap<Object, Object>();
  88. Iterator it = freqDict1.keySet().iterator();
  89. while (it.hasNext()) {
  90. Object newkey = it.next();
  91. double value = 0;
  92. value = Double.parseDouble((String) freqDict1.get(newkey)) / data1.length;
  93. probDict1.put(newkey, value + "");
  94. }
  95. it = freqDict2.keySet().iterator();
  96. while (it.hasNext()) {
  97. Object newkey = it.next();
  98. double value = 0;
  99. value = Double.parseDouble((String) freqDict2.get(newkey)) / data2.length;
  100. probDict2.put(newkey, value + "");
  101. }
  102. // Calculate the relative entropy
  103. it = probDict1.keySet().iterator();
  104. while (it.hasNext()) {
  105. Object newkey = it.next();
  106. Object value1 = probDict1.get(newkey);
  107. //Object value2 = probDict2.get(newkey);
  108. double dValue1 = Double.parseDouble(probDict1.get(newkey).toString());
  109. double dValue2 = Double.parseDouble(probDict2.get(newkey).toString());
  110. if ( value1.toString().trim().compareToIgnoreCase("1.0") == 0) {
  111. result = result + dValue1 * (Math.log(dValue1/dValue2) / Math.log(2));
  112. } else if (value1.toString().trim().compareToIgnoreCase("0") == 0){
  113. result = result + (1-dValue1) * (Math.log((1-dValue1)/(1-dValue2)) / Math.log(2));
  114. } else {
  115. result = result + dValue1 * (Math.log(dValue1/dValue2) / Math.log(2));
  116. result = result + (1-dValue1) * (Math.log((1-dValue1)/(1-dValue2)) / Math.log(2));
  117. }
  118. // toFile(result+"", "probDict1.txt");
  119. // toFile(result, "resultsOfresult.txt");//check point by Jingyu
  120. }
  121. //toFile(probDict1.size()+ "*******************", "probDict1.txt");
  122. //System.out.println("relative entropy = " + result);
  123. return result;
  124. }
  125. private static void toFile(String data, String filename) {
  126. // Output to file
  127. try {
  128. BufferedWriter writer = new BufferedWriter(new FileWriter(filename,true));
  129. writer.write(data + "\n");
  130. writer.close();
  131. } catch (Exception e) {
  132. System.err.println(e.getStackTrace());
  133. }
  134. }
  135. private static void toFileHM(HashMap data, String filename) {
  136. // Output to file
  137. try {
  138. BufferedWriter writer = new BufferedWriter(new FileWriter(filename, true));
  139. for (Object key : data.keySet()) {
  140. writer.write(key.toString() +":"+ data.get(key)+"\n");
  141. }
  142. writer.close();
  143. } catch (Exception e) {
  144. System.err.println(e.getStackTrace());
  145. }
  146. }
  147. }