Squashed initial commit
This commit is contained in:
155
qhtcp-workflow/apps/java/weka-clustering/src/SGD2AttrTable.java
Executable file
155
qhtcp-workflow/apps/java/weka-clustering/src/SGD2AttrTable.java
Executable file
@@ -0,0 +1,155 @@
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/*
|
||||
* This program starts by creating an intermediate table and then will load the function from Dr. Brett McKinney to create the attribute table.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author DTian
|
||||
*/
|
||||
public class SGD2AttrTable {
|
||||
|
||||
public void createIntermediateTable(String inputFile, String outputFile) {
|
||||
HashMap geneToGODict = new HashMap();
|
||||
try {
|
||||
|
||||
FileReader fr = new FileReader(inputFile);
|
||||
BufferedReader br = new BufferedReader(fr);
|
||||
|
||||
// strRow is used to read line from file(skip first row)
|
||||
String strRow = br.readLine();
|
||||
|
||||
// The while loop read the data from data file to vvf
|
||||
while ((strRow = br.readLine()) != null) {
|
||||
|
||||
// Check: skip the line if it is a comment line
|
||||
if (strRow.trim().charAt(0) != 'S' ) {
|
||||
continue;
|
||||
}
|
||||
String [] strArray = strRow.trim().split("\\t");
|
||||
String key = toKey(strArray[10].toUpperCase());
|
||||
if (key.compareToIgnoreCase("") == 0) {
|
||||
continue;
|
||||
}
|
||||
String value = toValue(strArray[4]);
|
||||
if (geneToGODict.containsKey(key)) {
|
||||
geneToGODict.put(key, geneToGODict.get(key)+ "\t" + value);
|
||||
} else {
|
||||
geneToGODict.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
br.close();
|
||||
fr.close();
|
||||
|
||||
// Write to output file
|
||||
FileOutputStream stream; // provides file access
|
||||
OutputStreamWriter writer; // writes to the file
|
||||
stream = new FileOutputStream(new File(outputFile), true);
|
||||
writer = new OutputStreamWriter(stream);
|
||||
Iterator it = geneToGODict.keySet().iterator();
|
||||
while(it.hasNext()){
|
||||
String key = it.next().toString();
|
||||
String value = geneToGODict.get(key).toString();
|
||||
writer.write(key + "\t" + value + "\n");
|
||||
}
|
||||
writer.flush();
|
||||
writer.close();
|
||||
stream.close();
|
||||
} catch (IOException e) {
|
||||
// Catch possible io errors from readLine()
|
||||
System.out.println("IOException error in 'class SGD2AttrTable, method createIntermediateTable'");
|
||||
}
|
||||
}
|
||||
|
||||
public void createAttrTable(String intermediaFile, String outputFile){
|
||||
HashMap geneToGODict = new HashMap();
|
||||
try {
|
||||
|
||||
FileReader fr = new FileReader(intermediaFile);
|
||||
BufferedReader br = new BufferedReader(fr);
|
||||
|
||||
// strRow is used to read line from file(skip first row)
|
||||
String strRow = br.readLine();
|
||||
|
||||
// The while loop read the data from data file to vvf
|
||||
while ((strRow = br.readLine()) != null) {
|
||||
|
||||
//check: skip the line if it is a comment line
|
||||
if (strRow.trim().charAt(0) != 'S' ) {
|
||||
continue;
|
||||
}
|
||||
String [] strArray = strRow.trim().split("\\t");
|
||||
String key = toKey(strArray[10].toUpperCase());
|
||||
if (key.compareToIgnoreCase("") == 0) {
|
||||
continue;
|
||||
}
|
||||
String value = toValue(strArray[4]);
|
||||
if (geneToGODict.containsKey(key)) {
|
||||
geneToGODict.put(key, geneToGODict.get(key)+ "\t" + value);
|
||||
} else {
|
||||
geneToGODict.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
br.close();
|
||||
fr.close();
|
||||
|
||||
// Write to output file
|
||||
FileOutputStream stream; // provides file access
|
||||
OutputStreamWriter writer; // writes to the file
|
||||
stream = new FileOutputStream(new File(outputFile), true);
|
||||
writer = new OutputStreamWriter(stream);
|
||||
Iterator it = geneToGODict.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String key = it.next().toString();
|
||||
String value = geneToGODict.get(key).toString();
|
||||
writer.write(key + "\t" + value + "\n");
|
||||
}
|
||||
writer.flush();
|
||||
writer.close();
|
||||
stream.close();
|
||||
} catch (IOException e) {
|
||||
// Catch possible io errors from readLine()
|
||||
System.out.println("IOException error in 'class SGD2AttrTable, method createIntermediateTable'");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param raw the string need to be get rid of the "GO:0s"
|
||||
* @return the string without "GO:00"
|
||||
*/
|
||||
private String toValue(String raw) {
|
||||
String result = raw.toUpperCase(); //raw should be like: "GO:0005739"
|
||||
// Delete "GO:"
|
||||
result = result.substring(3);
|
||||
// Delete "lead zeros"
|
||||
while (result.charAt(0) == '0') {
|
||||
result =result.substring(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private String toKey(String raw) {
|
||||
String result = raw.toUpperCase(); // raw should be like: "GO:0005739"
|
||||
// Find the '|'
|
||||
int end = result.indexOf('|');
|
||||
// Get the sub string
|
||||
if (end < 0) {
|
||||
return result;
|
||||
} else {
|
||||
return result.substring(0, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user