513 líneas
18 KiB
R
513 líneas
18 KiB
R
if(i==3){
|
|
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
|
|
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
|
|
if(i==5){
|
|
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
|
|
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
|
|
}
|
|
if(i==7){
|
|
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
|
|
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
|
|
}
|
|
if(grepl(".1",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
|
|
if (grepl(".2",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
|
|
if(grepl(".3",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
|
|
}
|
|
colnames(shiftOnly)<- shiftLabels
|
|
colnames(REMcRdy)<- REMcRdyLabels
|
|
View(shiftOnlyGT2)
|
|
View(shiftOnlyGT2)
|
|
REMcRdyLabels
|
|
REMcRdyLabels[3]
|
|
gsub("[.]", " ", REMcRdyLabels[4])
|
|
gsub("[.]", "_", REMcRdyLabels[4])
|
|
gsub("[.]", "-", REMcRdyLabels[4])
|
|
gsub("[.]", "_", REMcRdyLabels[4])
|
|
View(X)
|
|
View(X)
|
|
View(X4)
|
|
View(X4)
|
|
for(i in 3:(length(shftHdr))){
|
|
gsub("[.]", "_", REMcRdyLabels[i])
|
|
gsub("[.]", "_", shiftLabels[i])
|
|
}
|
|
colnames(shiftOnly)<- shiftLabels
|
|
colnames(REMcRdy)<- REMcRdyLabels
|
|
#+++++++++++++++++++++++
|
|
combI= headSel2 #Starting Template orf, Genename columns
|
|
#headersRemc<-colnames(REMcRdy)
|
|
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
|
for(i in 3:length(colnames(REMcRdy))){
|
|
combI=cbind.data.frame(combI, shiftOnly[i])
|
|
combI=cbind.data.frame(combI, REMcRdy[i])
|
|
}
|
|
Vec1= NA
|
|
Vec2= NA
|
|
Vec3= NA
|
|
Vec4= NA
|
|
Vec5= NA
|
|
Vec6= NA
|
|
Vec7= NA
|
|
Vec8= NA
|
|
if(length(REMcRdy)== 6){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
bolVec= Vec1 | Vec2
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6]
|
|
}
|
|
if(length(REMcRdy)== 8){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
Vec5= abs(REMcRdy[,7])>=std
|
|
Vec6= abs(REMcRdy[,8])>=std
|
|
bolVec= Vec1 | Vec2 |Vec3
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8]
|
|
}
|
|
if(length(REMcRdy)== 10){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
Vec5= abs(REMcRdy[,7])>=std
|
|
Vec6= abs(REMcRdy[,8])>=std
|
|
Vec7= abs(REMcRdy[,9])>=std
|
|
Vec8= abs(REMcRdy[,10])>=std
|
|
bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10]
|
|
}
|
|
if(std!=0){
|
|
REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)]
|
|
shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)]
|
|
}
|
|
if(std==0){
|
|
REMcRdy= REMcRdy #[,2:length(REMcRdy)]
|
|
shiftOnly= shiftOnly #[,2:length(shiftOnly)]
|
|
}
|
|
print(paste("SD=",std))
|
|
print(getwd())
|
|
#write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE)
|
|
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE)
|
|
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE)
|
|
#LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",")
|
|
pwd=getwd()
|
|
print(getwd)
|
|
LabelStd<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
|
|
print(std)
|
|
LabelStd[,4]= as.numeric(std)
|
|
write.csv(LabelStd,file="Parameters.csv",row.names = FALSE)
|
|
write.csv(LabelStd,file="StudyInfo.csv",row.names = FALSE)
|
|
View(REMcRdy)
|
|
View(REMcRdy)
|
|
View(shiftOnly)
|
|
View(shiftOnly)
|
|
View(shiftOnlyGT2)
|
|
View(shiftOnlyGT2)
|
|
View(REMcRdyGT2)
|
|
View(REMcRdyGT2)
|
|
View(shiftOnly)
|
|
REMcRdy
|
|
colnames(shiftOnly)<- shiftLabels
|
|
View(shiftOnly)
|
|
for(i in 3:(length(shftHdr))){
|
|
j=as.numeric(i)
|
|
gsub("[.]", "_", REMcRdyLabels[i])
|
|
gsub("[.]", "_", shiftLabels[i])
|
|
}
|
|
REMcRdyLabels[4]
|
|
gsub("[.]", "_", REMcRdyLabels[4])
|
|
for(i in 3:(length(shftHdr))){
|
|
j=as.numeric(i)
|
|
gsub("[.]", "_", REMcRdyLabels[j])
|
|
gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
colnames(shiftOnly)<- shiftLabels
|
|
REMcRdyLabels
|
|
as.integer(5)
|
|
for(i in 3:(length(shftHdr))){
|
|
j=as.integer(i)
|
|
gsub("[.]", "_", REMcRdyLabels[j])
|
|
gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
#R fails again. I've tried asnumeric and as.integer to work around R's issues
|
|
for(i in 3:(length(REMcRdyLabels))){
|
|
j=as.integer(i)
|
|
gsub("[.]", "_", REMcRdyLabels[j])
|
|
gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
#R fails again. I've tried asnumeric and as.integer to work around R's issues
|
|
for(i in 3:(length(REMcRdyLabels))){
|
|
j=as.integer(i)
|
|
REMcRdyLabels=gsub("[.]", "_", REMcRdyLabels[j])
|
|
shiftLabels=gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
#R fails again. I've tried asnumeric and as.integer to work around R's issues
|
|
for(i in 3:(length(REMcRdyLabels))){
|
|
j=as.integer(i)
|
|
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
|
|
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
for(i in 3:(length(shftHdr))){
|
|
if(i==3){
|
|
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
|
|
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
|
|
if(i==5){
|
|
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
|
|
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
|
|
}
|
|
if(i==7){
|
|
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
|
|
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
|
|
}
|
|
if(grepl(".1",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
|
|
if (grepl(".2",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
|
|
if(grepl(".3",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
|
|
}
|
|
#R fails again. I've tried asnumeric and as.integer to work around R's issues
|
|
for(i in 3:(length(REMcRdyLabels))){
|
|
j=as.integer(i)
|
|
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
|
|
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
length(REMcRdyLabels)
|
|
for(i in 3:(length(shftHdr))){
|
|
if(i==3){
|
|
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
|
|
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
|
|
if(i==5){
|
|
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
|
|
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
|
|
}
|
|
if(i==7){
|
|
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
|
|
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
|
|
}
|
|
if(grepl(".1",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
|
|
if (grepl(".2",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
|
|
if(grepl(".3",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
|
|
}
|
|
#R can't do math reliably within a nested loop, Therefore requires a complex text based work around+++++++++++++++
|
|
#Do it with text search and replace or modify.
|
|
#R is just a badly designed or rather badly evolved mutant language!!! Full of problems and inconsistencies!!!
|
|
#R causes huge waste of time.
|
|
#R is not worth fixing. It needs to be discontinued both in use and development before it does more harm.
|
|
#Using Text search grepl to relabel headers+++++++++++++++++++++++++++++++++++++++++
|
|
REMcRdyHdr= colnames(REMcRdy)
|
|
REMcRdyLabels= 'asdf'
|
|
shftHdr= colnames(shiftOnly)
|
|
shiftLabels='asdf'
|
|
shiftLabels[1:2]<-shftHdr[1:2]
|
|
REMcRdyLabels[1:2]<-REMcRdyHdr[1:2]
|
|
for(i in 3:(length(shftHdr))){
|
|
if(i==3){
|
|
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
|
|
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
|
|
if(i==5){
|
|
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
|
|
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
|
|
}
|
|
if(i==7){
|
|
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
|
|
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
|
|
}
|
|
if(grepl(".1",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
|
|
if (grepl(".2",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
|
|
if(grepl(".3",shftHdr[i],fixed=true)){
|
|
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
|
|
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
|
|
}
|
|
#R fails again. I've tried asnumeric and as.integer to work around R's issues
|
|
for(i in 3:(length(REMcRdyLabels))){
|
|
j=as.integer(i)
|
|
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
|
|
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
|
|
}
|
|
colnames(shiftOnly)<- shiftLabels
|
|
colnames(REMcRdy)<- REMcRdyLabels
|
|
#+++++++++++++++++++++++
|
|
combI= headSel2 #Starting Template orf, Genename columns
|
|
#headersRemc<-colnames(REMcRdy)
|
|
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
|
for(i in 3:length(colnames(REMcRdy))){
|
|
combI=cbind.data.frame(combI, shiftOnly[i])
|
|
combI=cbind.data.frame(combI, REMcRdy[i])
|
|
}
|
|
Vec1= NA
|
|
Vec2= NA
|
|
Vec3= NA
|
|
Vec4= NA
|
|
Vec5= NA
|
|
Vec6= NA
|
|
Vec7= NA
|
|
Vec8= NA
|
|
if(length(REMcRdy)== 6){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
bolVec= Vec1 | Vec2
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6]
|
|
}
|
|
if(length(REMcRdy)== 8){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
Vec5= abs(REMcRdy[,7])>=std
|
|
Vec6= abs(REMcRdy[,8])>=std
|
|
bolVec= Vec1 | Vec2 |Vec3
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8]
|
|
}
|
|
if(length(REMcRdy)== 10){
|
|
Vec1= abs(REMcRdy[,3])>=std
|
|
Vec2= abs(REMcRdy[,4])>=std
|
|
Vec3= abs(REMcRdy[,5])>=std
|
|
Vec4= abs(REMcRdy[,6])>=std
|
|
Vec5= abs(REMcRdy[,7])>=std
|
|
Vec6= abs(REMcRdy[,8])>=std
|
|
Vec7= abs(REMcRdy[,9])>=std
|
|
Vec8= abs(REMcRdy[,10])>=std
|
|
bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8
|
|
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
|
REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10]
|
|
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
|
shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10]
|
|
}
|
|
if(std!=0){
|
|
REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)]
|
|
shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)]
|
|
}
|
|
if(std==0){
|
|
REMcRdy= REMcRdy #[,2:length(REMcRdy)]
|
|
shiftOnly= shiftOnly #[,2:length(shiftOnly)]
|
|
}
|
|
print(paste("SD=",std))
|
|
print(getwd())
|
|
#write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE)
|
|
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE)
|
|
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE)
|
|
#LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",")
|
|
pwd=getwd()
|
|
print(getwd)
|
|
LabelStd<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
|
|
print(std)
|
|
LabelStd[,4]= as.numeric(std)
|
|
write.csv(LabelStd,file="Parameters.csv",row.names = FALSE)
|
|
write.csv(LabelStd,file="StudyInfo.csv",row.names = FALSE)
|
|
View(REMcRdy)
|
|
View(REMcRdy)
|
|
setwd("/mnt/data/StudiesQHTCP/ss_YOR1nullRPL12a_deltaF/Exp4")
|
|
genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11))))
|
|
for(i in 1:length(X[,14])){
|
|
ii= as.integer(i)
|
|
line_num = match(X[ii,14],genes[,1],nomatch=1)
|
|
OrfRepColNum= as.numeric(match('OrfRep',names(X)))
|
|
if(X[ii,OrfRepColNum]!= "YDL227C"){
|
|
X[ii,15] = genes[line_num,2]
|
|
}
|
|
if((X[ii,15] == "")||(X[ii,15] == "OCT1")){
|
|
X[ii,15] = X[ii,OrfRepColNum]
|
|
}
|
|
}
|
|
#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
|
|
#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results
|
|
#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor.
|
|
SGDgeneList= "../Code/SGD_features.tab"
|
|
genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11))))
|
|
View(genes)
|
|
View(genes)
|
|
dlgenes=droplevels(genes)
|
|
View(dlgenes)
|
|
View(genes)
|
|
View(dlgenes)
|
|
#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script.
|
|
#Adapt SS For Structured Data storage but using command line scripts
|
|
###Set up the required libraries, call required plot theme elements and set up the command line arguments
|
|
library("ggplot2")
|
|
library("plyr")
|
|
library("extrafont")
|
|
library("gridExtra")
|
|
library("gplots")
|
|
library("RColorBrewer")
|
|
library("stringr")
|
|
#library("gdata")
|
|
library(plotly)
|
|
library(htmlwidgets)
|
|
Args <- commandArgs(TRUE)
|
|
getwd()
|
|
input_file <- "!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt
|
|
"
|
|
#Path to Output Directory
|
|
W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this
|
|
outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/")
|
|
subDir <- outDir #Args[2]
|
|
if (file.exists(subDir)){
|
|
outputpath <- subDir
|
|
} else {
|
|
dir.create(file.path(subDir))
|
|
}
|
|
if (file.exists(paste(subDir,"QC/",sep=""))){
|
|
outputpath_QC <- paste(subDir,"QC/",sep="")
|
|
} else {
|
|
dir.create(file.path(paste(subDir,"QC/",sep="")))
|
|
outputpath_QC <- paste(subDir,"QC/",sep="")
|
|
}
|
|
#define the output path (formerly the second argument from Rscript)
|
|
outputpath <- outDir
|
|
#Set Args[2] the Background contamination noise filter as a function of standard deviation
|
|
#Sean recommends 3 or 5 SD factor.
|
|
#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference
|
|
Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",")
|
|
print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean")
|
|
std= as.numeric(Args[2])
|
|
expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd()))
|
|
Labels[expNumber,3]= as.numeric(std)
|
|
Delta_Background_sdFactor <- std
|
|
Delta_Background_sdFactor=3
|
|
std=3
|
|
DelBGFactr <- as.numeric(Delta_Background_sdFactor)
|
|
#Write Background SD value to studyInfo.txt file
|
|
#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE)
|
|
write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE)
|
|
print('ln 50 write StudyInfo.csv ')
|
|
#read in the data
|
|
X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE)
|
|
input_file=input_file <- "!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt"
|
|
#read in the data
|
|
X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE)
|
|
View(X)
|
|
View(X)
|
|
X <- X[!(X[[1]]%in%c("","Scan")),]
|
|
X_length <- length(X[1,])
|
|
X_end <- length(X[1,]) - 2
|
|
X <- X[,c(1:46,X_end:X_length)]
|
|
#use numeric data to perform operations
|
|
X$Col <- as.numeric(X$Col)
|
|
X$Row <- as.numeric(X$Row)
|
|
X$l <- as.numeric(X$l)
|
|
X$K <- as.numeric(X$K)
|
|
X$r <- as.numeric(X$r)
|
|
X$Scan <- as.numeric(X$Scan)
|
|
X$AUC <- as.numeric(X$AUC)
|
|
X$LstBackgrd <- as.numeric(X$LstBackgrd)
|
|
X$X1stBackgrd <- as.numeric(X$X1stBackgrd)
|
|
#set the OrfRep to YDL227C for the ref data
|
|
X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C"
|
|
#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used.
|
|
#That is the first DM plates are removed from the data set with the following.
|
|
X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4
|
|
#get total number of drug concentrations
|
|
Total_Conc_Nums <- length(unique(X$Conc))
|
|
#function to ID numbers in string with characters+numbers (ie to get numeric drug conc)
|
|
numextract <- function(string){
|
|
str_extract(string, "\\-*\\d+\\.*\\d*")
|
|
}
|
|
#generate a new column with the numeric drug concs
|
|
X$Conc_Num <- as.numeric(numextract(X$Conc))
|
|
#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later
|
|
X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1
|
|
#Get the max factor for concentration
|
|
MAX_CONC <- max(X$Conc_Num_Factor)
|
|
#remove wells with problems for making graphs and to not include in summary statistics
|
|
X <- X[X$Gene != "BLANK",]
|
|
X <- X[X$Gene != "Blank",]
|
|
X <- X[X$ORF != "Blank",]
|
|
X <- X[X$Gene != "blank",]
|
|
#X <- X[X$Gene != "HO",]
|
|
Xbu= X
|
|
#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
|
|
#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results
|
|
#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor.
|
|
SGDgeneList= "../Code/SGD_features.tab"
|
|
genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11))))
|
|
View(genes)
|
|
View(genes)
|
|
ii=1
|
|
line_num = match(X[ii,14],genes[,1],nomatch=1)
|
|
line_num
|
|
X[ii,14]
|
|
,genes[,1]
|
|
,genes[ ,1]
|
|
,genes[,1]
|
|
,genes[10381,1]
|
|
genes[10381,1]
|
|
line_num = match(X[ii,15],genes[,2],nomatch=1)
|
|
line_num
|
|
X[ii,15]
|
|
X[ii,OrfRepColNum]
|
|
OrfRepColNum= as.numeric(match('OrfRep',names(X)))
|
|
X[ii,OrfRepColNum]
|
|
for(i in 1:length(X[,14])){
|
|
ii= as.integer(i)
|
|
line_num = match(X[ii,15],genes[,2],nomatch=1)
|
|
OrfRepColNum= as.numeric(match('OrfRep',names(X)))
|
|
if(X[ii,OrfRepColNum]!= "YDL227C"){
|
|
X[ii,15] = genes[line_num,2]
|
|
}
|
|
if((X[ii,15] == "")||(X[ii,15] == "OCT1")){
|
|
X[ii,15] = X[ii,OrfRepColNum]
|
|
}
|
|
}
|
|
contains(X$Gene,"")
|
|
grepl('RF1',X$Gene)
|
|
grepl('"',X$Gene)
|
|
where(grepl('"',X$Gene)= 'true')
|
|
where(grepl('"',X$Gene)= true)
|
|
find(grepl('"',X$Gene)= true)
|
|
match(grepl('"',X$Gene))
|
|
which(array(grepl("\\b"\b",as.matrix(X)),dim(X)),T)
|
|
which(array(grepl("\\bAAC1\b",as.matrix(X)),dim(X)),T)
|
|
which(array(grepl("\\bRF1\b",as.matrix(X)),dim(X)),T)
|
|
asd=which(array(grepl("\\bRF1\b",as.matrix(X)),dim(X)),T)
|
|
View(asd)
|
|
View(asd)
|
|
asd=which(array(grepl("\\bRF1\b",as.matrix(X$Gene)),dim(X$Gene)),T)
|
|
asd=which(array(grepl("\\bRF1\b",as.matrix(X$Gene)))
|
|
asd=which(array(grepl("\\bRF1\b",as.matrix(X$Gene))))
|
|
asd=which(array(grepl("\\bRF1\b",as.matrix(X$Gene))
|
|
)
|
|
)
|
|
)
|
|
write.csv(X,file=paste("../Code/CheckRcrap.csv"),row.names = FALSE)
|
|
setwd("/mnt/data/StudiesQHTCP/ss_YOR1nullRPL12a_deltaF/Code")
|