#JoinInteractExps3dev.R #User prompt for std Value cat("Enter a Standard Deviation value to filter data to be used by REMc ?\n") inp <- readLines(file("stdin"), n = 1L) cat(paste("Standard Deviation Value is", inp, "\n")) #set std deviation multiplier default if no user entry if(is.numeric(inp)){ std= inp }else{std= 2} #The input files should be entered in order from the greatest number of rows(Orfs) to least. #Args <- commandArgs(TRUE) #if(length(Args)==0){ # std=0 #}else{ # std=Args[1] #} print(paste("SD=",std)) #Wstudy= getwd() #dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles")) #outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles") #dir.create("../REMc") #(paste0(Wstudy,"/JoinFiles")) outDir <- "./" #../REMc" #paste0(Wstudy,"/JoinFiles") print(outDir) #Args= 'asdf' #Args[1]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv") #Args[2]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv") #Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv") #Args[4]= "../Exp4/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv") #Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp3/ZScores/ZScores_Interaction.csv") inputFile= 'asdf' if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){ inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){ inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){ inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv' } if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){ inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv' } #Args= inputFile #outDir <- ArgsJoin[1] #Output Directory print(length(inputFile)) #display the number of arguments on terminal #open required library for the join function (libraries must already be install on R) library(plyr) library(dplyr) library(sos) #read in the files for your experiment and #join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes. #if X1 has a larger number of genes, switch the order of X1 and X2 if(length(inputFile)==2) { X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums }else if(length(inputFile)==3){ X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") X <- join(X,X3,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1',-'Gene.2') headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) headSel2 = select(headSel2, -'Gene.1',-'Gene.2') }else if(length(inputFile)==4){ X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) X4 <- read.csv(file= inputFile[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE) X <- join(X1,X2,by="OrfRep") X <- join(X,X3,by="OrfRep") X <- join(X,X4,by="OrfRep") OBH=X[,order(colnames(X))] #OrderByHeader headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3') headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3') } #headSel$contains('Z_Shift') %>% replace_na(0.001) headers<-colnames(headSel) i=0 for(i in 1:length(headers)){ if(grepl("Shift",headers[i])) { headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001 } if(grepl("Z_lm_",headers[i])) { headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001 } } #2SD option code to exclude Z_lm values less than 2 standard Deviations #+++++++++++++ REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_')) shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift')) #Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE,sep= ",") #R can't do math reliably within a nested loop, Therefore requires a complex text based work around+++++++++++++++ #Do it with text search and replace or modify. #R is just a badly designed or rather badly evolved mutant language!!! Full of problems and inconsistencies!!! #R causes huge waste of time. #R is not worth fixing. It needs to be discontinued both in use and development before it does more harm. #Using Text search grepl to relabel headers+++++++++++++++++++++++++++++++++++++++++ REMcRdyHdr= colnames(REMcRdy) REMcRdyLabels= 'asdf' shftHdr= colnames(shiftOnly) shiftLabels='asdf' shiftLabels[1:2]<-shftHdr[1:2] REMcRdyLabels[1:2]<-REMcRdyHdr[1:2] for(i in 3:(length(shftHdr))){ if(i==3){ shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3]) REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) } if(i==5){ shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5]) REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5]) } if(i==7){ shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7]) REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7]) } if(grepl(".1",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])} if (grepl(".2",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])} if(grepl(".3",shftHdr[i],fixed=true)){ shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i]) REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])} } for(i in 3:(length(REMcRdyLabels))){ j=as.integer(i) REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j]) shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j]) } colnames(shiftOnly)<- shiftLabels colnames(REMcRdy)<- REMcRdyLabels #+++++++++++++++++++++++ combI= headSel2 #Starting Template orf, Genename columns #headersRemc<-colnames(REMcRdy) #Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps. for(i in 3:length(colnames(REMcRdy))){ combI=cbind.data.frame(combI, shiftOnly[i]) combI=cbind.data.frame(combI, REMcRdy[i]) } #Effort to use R vectorization select data greater than 2 Standard deviations. #R can't handle variables in the...$(varible). The column names must be explicit. Just another R issue of thoughtlessness. #REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2] #another failed R work-around #REMcGT2<- REMcRdy[abs(REMcRdy[,3])>=2 | abs(REMcRdy[,4])>=2 | abs(REMcRdy[,5])>=2 | abs(REMcRdy[,6])>=2] #A simple task made difficult by R #Well R doesn't handle loops well but I may have to try some extreme measures. #a Fundamental reason RisFuckedUp StackOverflow ...$ and everything else (for, if, +,^,...) that is treated as a function can not accept argument which must be evaluated. #Therefore just another reason R is shit! This alone should qualify the language to the garbage bin. #Working in R is a constant battle to workaround its shortsighted deficits. #REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2,] #Vectorization with variable adaptation of Sean's string explicit code will not work because R cannot accept variables. Piss poor language. #So what the hell to do??? #Basically we just need to 'OR' all the column vectors that are >=2 'True' and let that be a binary vector to apply across the data.frame REMcRdy #R doesn't allow a vector of vectors! #I guess the only way to do this simple task in R is to create a set of explicit variables, #then fill each variable with the binary results, next 'OR those results and use that vector of bolean results to select values from data.frame. #This is totally stupid crazy. But this is R. #R violates all the fundamental rules of automation. R Requires specific explicit parameters. #Since the largest REMc is four experiments make eight discrete vectors(K and L for each of the experiments). #The totally stupid work-around for R Vec1= NA Vec2= NA Vec3= NA Vec4= NA Vec5= NA Vec6= NA Vec7= NA Vec8= NA if(length(REMcRdy)== 6){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std bolVec= Vec1 | Vec2 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6] } if(length(REMcRdy)== 8){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std Vec5= abs(REMcRdy[,7])>=std Vec6= abs(REMcRdy[,8])>=std bolVec= Vec1 | Vec2 |Vec3 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8] } if(length(REMcRdy)== 10){ Vec1= abs(REMcRdy[,3])>=std Vec2= abs(REMcRdy[,4])>=std Vec3= abs(REMcRdy[,5])>=std Vec4= abs(REMcRdy[,6])>=std Vec5= abs(REMcRdy[,7])>=std Vec6= abs(REMcRdy[,8])>=std Vec7= abs(REMcRdy[,9])>=std Vec8= abs(REMcRdy[,10])>=std bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8 REMcRdyGT2= REMcRdy[bolVec,1:2] REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10] shiftOnlyGT2= shiftOnly[bolVec,1:2] shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10] } if(std!=0){ REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)] shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)] } if(std==0){ REMcRdy= REMcRdy #[,2:length(REMcRdy)] shiftOnly= shiftOnly #[,2:length(shiftOnly)] } #Yet again R creates a problem, placing hidden "" around the header names. The following # is intended to remove those quote so that the "" donot blow up the Java REMc. #Use ,quote=F in the write.csv statement to fix R output file. print(paste("SD=",std)) print(getwd()) #write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE) write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE, quote=F) write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE, quote=F) #LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",") pwd=getwd() print(getwd) LabelStd<- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) print(std) LabelStd[,4]= as.numeric(std) write.csv(LabelStd,file="../Code/Parameters.csv",row.names = FALSE) write.csv(LabelStd,file="../Code/StudyInfo.csv",row.names = FALSE) cat(paste("Standard Deviation Value was set as", std, "\n"))