/* * the input: 3 files, 1 is cluster file, 2 is Go matrix file (lookup table) * 3 is back ground file(pool) * * */ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Random; /** * * @author DTian */ public class RawGoID { private ArrayList clusterGeneList; // for the input cluster file private Matrix poolTable; //for the filtered gene pool list private Matrix lookupTable; // for the lookup attribute table private int oriClusterSize; //for the original cluster size private ArrayList oriPoolOrfsName;//for the complete list of pool table // private String randomFilename; public Matrix getLookupTable() { return lookupTable; } public void setLookupTable(Matrix lookupTable) { this.lookupTable = lookupTable; } public Matrix getPoolTable() { return poolTable; } public void setPoolTable(Matrix poolTable) { this.poolTable = poolTable; } public ArrayList getClusterGeneList() { return clusterGeneList; } public void setClusterGeneList(ArrayList clusterGeneList) { this.clusterGeneList = clusterGeneList; } public RawGoID() { clusterGeneList = new ArrayList(); poolTable = new Matrix(); lookupTable = new Matrix(); // randomFilename =""; } public void setOriClusterSize(int oriClusterSize) { this.oriClusterSize = oriClusterSize; } public int getOriClusterSize () { return oriClusterSize; } public void setOriPoolOrfsName(ArrayList oriPoolOrfsName) { this.oriPoolOrfsName = oriPoolOrfsName; } public ArrayList getOriPoolOrfsName() { return oriPoolOrfsName; } /** * * @param clusterFilename : cluster Filename * @param GoMatrixFilename : GoMatrix Filename * @param backGroundFilename : backGround Filename */ public RawGoID(String clusterFilename, String GoMatrixFilename, String backGroundFilename) { try { clusterGeneList = new ArrayList(200); ArrayList refClusterGeneList = new ArrayList (200); // Get the smallGeneList (a cluster ) BufferedReader br = new BufferedReader(new FileReader(clusterFilename)); // strRow is used to read line from file String strRow = ""; while ((strRow = br.readLine()) != null) { clusterGeneList.add(strRow.trim().toLowerCase()); } // System.out.println(clusterGeneList.size()); setOriClusterSize(clusterGeneList.size()); // System.out.println("original cluster size =" + clusterGeneList.size()); // Get the mtrix (lookup table) lookupTable = new Matrix(GoMatrixFilename); // Get the bigGeneList (pool or back ground file) br = new BufferedReader(new FileReader(backGroundFilename)); ArrayList poolOrfsName = new ArrayList(5000); while ((strRow = br.readLine()) != null) { poolOrfsName.add(strRow.trim().toLowerCase()); } this.setOriPoolOrfsName(poolOrfsName); poolTable = new Matrix(); for (int i = 0; i < poolOrfsName.size(); i++) { Object tempKey = poolOrfsName.get(i); if (lookupTable.getMatrix().containsKey(tempKey)) { poolTable.addValue(tempKey, lookupTable.getMatrix().get(tempKey)); } } poolTable.setRowSize(poolTable.getMatrix().size()); poolTable.setColSize(lookupTable.getColSize()); br.close(); // This loop is to take out any ORF from the cluster gene list if not exist in pool table // not necessary if all cluster ORFs are from pool table for (int i=0;i result = new ArrayList(clusterSize); // Jingyu: The following segment of code, which is deactivated, is designed to get the random cluster list from a lookuptable-filtered pooltable // Jingyu: To do so may cause a bias in average of random raw GOid score by using a smaller pool list // get a random cluster with same size of the cluster file and then calculate the Goid // 1, get the random orf names to a ArrayList // HashMap hm = new HashMap(this.getClusterGeneList().size()); // while (hm.keySet().size() < clusterSize) { // hm.put(this.getPoolTable().getOrfNames().get(randInt(this.getPoolTable().getOrfNames().size())), "0"); // } // result.addAll(hm.keySet()); // Get a random cluster with same size of the cluster file from the original ORF pool // Extra step added by Jingyu to remove the ORFs not existing in pooltable; ArrayList localOriPoolTable = new ArrayList(); localOriPoolTable = this.getOriPoolOrfsName(); // Checkpoint // System.out.println(localOriPoolTable.size()); for (int i=0;i