#JoinInteractExps3dev.R #User prompt for std Value cat("Enter a Standard Deviation value to filter data to be used by REMc ?\n") inp <- readLines(file("stdin"), n = 1L) cat(paste("Standard Deviation Value is", inp, "\n")) inp<- as.numeric(inp) #set std deviation multiplier default if no user entry if(is.double(inp) & !is.na(inp)){ std= inp }else{std= 2} #The input files should be entered in order from the greatest number of rows(Orfs) to least. #Args <- commandArgs(TRUE) #if(length(Args)==0){ # std=0 #}else{ # std=Args[1] #} print(paste("SD=",std)) #Wstudy= getwd() #dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles")) #outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles") #dir.create("../REMc") #(paste0(Wstudy,"/JoinFiles")) outDir <- "./" #../REMc" #paste0(Wstudy,"/JoinFiles") print(outDir) #Args= 'asdf' #Args[1]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv") #Args[2]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv") #Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv") #Args[4]= "../Exp4/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv") #Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp3/ZScores/ZScores_Interaction.csv") inputFile= 'asdf' if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){ inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){ inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){ inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){ inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv' } #Args= inputFile #outDir <- ArgsJoin[1] #Output Directory print(length(inputFile)) #display the number of arguments on terminal #open required library for the join function (libraries must already be install on R) library(plyr) library(sos) library(dplyr) #read in the files for your experiment and #join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes. #if X1 has a larger number of genes, switch the order of X1 and X2 if(length(inputFile)==2) { X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums }else if(length(inputFile)==3){ X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") X <- join(X,X3,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1',-'Gene.2') headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) headSel2 = select(headSel2, -'Gene.1',-'Gene.2') }else if(length(inputFile)==4){ X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) X4 <- read.csv(file= inputFile[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") X <- join(X,X3,by="OrfRep") X <- join(X,X4,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3') headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3') } #headSel$contains('Z_Shift') %>% replace_na(0.001) headers<-colnames(headSel) i=0 for(i in 1:length(headers)){ if(grepl("Shift",headers[i])) { headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001 } if(grepl("Z_lm_",headers[i])) { headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001 } } #2SD option code to exclude Z_lm values less than 2 standard Deviations #+++++++++++++ REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_')) shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift')) #Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE,sep= ",") #Using Text search grepl to relabel headers+++++++++++++++++++++++++++++++++++++++++ REMcRdyHdr= colnames(REMcRdy) REMcRdyLabels= 'asdf' shftHdr= colnames(shiftOnly) shiftLabels='asdf' shiftLabels[1:2]<-shftHdr[1:2] REMcRdyLabels[1:2]<-REMcRdyHdr[1:2] for(i in 3:(length(shftHdr))){ if(i==3){ shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3]) REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) } if(i==5){ shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5]) REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5]) } if(i==7){ shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7]) REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7]) } if(grepl(".1",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])} if (grepl(".2",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])} if(grepl(".3",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])} } for(i in 3:(length(REMcRdyLabels))){ j=as.integer(i) REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j]) shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j]) } colnames(shiftOnly)<- shiftLabels colnames(REMcRdy)<- REMcRdyLabels #+++++++++++++++++++++++ combI= headSel2 #Starting Template orf, Genename columns #headersRemc<-colnames(REMcRdy) #Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps. for(i in 3:length(colnames(REMcRdy))){ combI=cbind.data.frame(combI, shiftOnly[i]) combI=cbind.data.frame(combI, REMcRdy[i]) } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Vec1= NA Vec2= NA Vec3= NA Vec4= NA Vec5= NA Vec6= NA Vec7= NA Vec8= NA if(length(REMcRdy)== 6){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std bolVec= Vec1 | Vec2 |Vec3 |Vec4 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6] } if(length(REMcRdy)== 8){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std Vec5= abs(REMcRdy[,7])>=std Vec6= abs(REMcRdy[,8])>=std bolVec= Vec1 | Vec2 |Vec3 | Vec4 |Vec5 |Vec6 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8] } if(length(REMcRdy)== 10){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std Vec5= abs(REMcRdy[,7])>=std Vec6= abs(REMcRdy[,8])>=std Vec7= abs(REMcRdy[,9])>=std Vec8= abs(REMcRdy[,10])>=std bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10] } if(std!=0){ REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)] shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)] } if(std==0){ REMcRdy= REMcRdy #[,2:length(REMcRdy)] shiftOnly= shiftOnly #[,2:length(shiftOnly)] } #R places hidden "" around the header names. The following # is intended to remove those quote so that the "" donot blow up the Java REMc. #Use ,quote=F in the write.csv statement to fix R output file. print(paste("SD=",std)) print(getwd()) #write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE) write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE, quote=F) write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE, quote=F) #LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",") pwd=getwd() print(getwd) LabelStd<- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) print(std) LabelStd[,4]= as.numeric(std) write.csv(LabelStd,file="../Code/Parameters.csv",row.names = FALSE) write.csv(LabelStd,file="../Code/StudyInfo.csv",row.names = FALSE) cat(paste("Standard Deviation Value was set as", std, "\n"))