Files
hartman-server/workflow/templates/qhtcp/Exp4/Z_InteractionTemplate.R

2731 lines
166 KiB
R

#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script.
#Adapt SS For Structured Data storage but using command line scripts
###Set up the required libraries, call required plot theme elements and set up the command line arguments
library("ggplot2")
library("plyr")
library("extrafont")
library("gridExtra")
library("gplots")
library("RColorBrewer")
library("stringr")
#library("gdata")
library(plotly)
library(htmlwidgets)
Args <- commandArgs(TRUE)
input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt
#Tool to find a file and copy it to desired location
destDir= getwd()
#srcFile= file.choose()
#file.copy(srcFile, destDir)
#input_file= tail(strsplit(srcFile,"[/]")[[1]],1)
#Path to Output Directory
#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this
outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/")
subDir <- outDir #Args[2]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
if (file.exists(paste(subDir,"QC/",sep=""))){
outputpath_QC <- paste(subDir,"QC/",sep="")
} else {
dir.create(file.path(paste(subDir,"QC/",sep="")))
outputpath_QC <- paste(subDir,"QC/",sep="")
}
#define the output path (formerly the second argument from Rscript)
outputpath <- outDir
#Set Args[2] the Background contamination noise filter as a function of standard deviation
#std= as.numeric(Args[2])
#Sean recommends 3 or 5 SD factor.
#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference
Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",")
print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean")
#User prompt for std multiplier Value
cat("Enter a Standard Deviation value to noise filter \n")
inpChar<- readLines(file("stdin"), n = 1L)
cat(paste("Standard Deviation Value is", inpChar, "\n"))
inpNum= as.numeric(inpChar)
#set std deviation multiplier default if no user entry
if(!is.na(inpNum)){
std= inpNum
}else{std= 3}
expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd()))
Labels[expNumber,3]= as.numeric(std)
Delta_Background_sdFactor <- std
DelBGFactr <- as.numeric(Delta_Background_sdFactor)
#Write Background SD value to studyInfo.txt file
#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE)
write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE)
print('ln 50 write StudyInfo.csv ')
#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE)
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++
#read in the data
X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE)
X <- X[!(X[[1]]%in%c("","Scan")),]
#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76
#X <- X[which(X$Specifics == "WT"),]
#X_length <- length(X[1,])
#X_end <- length(X[1,]) - 2
#X <- X[,c(1:42,X_end:X_length)]
#use numeric data to perform operations
X$Col <- as.numeric(X$Col)
X$Row <- as.numeric(X$Row)
X$l <- as.numeric(X$l)
X$K <- as.numeric(X$K)
X$r <- as.numeric(X$r)
X$Scan <- as.numeric(X$Scan)
X$AUC <- as.numeric(X$AUC)
X$LstBackgrd <- as.numeric(X$LstBackgrd)
X$X1stBackgrd <- as.numeric(X$X1stBackgrd)
#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col
#as was the case in Gemcitabin and Cytarabin experiments.
#The following allows user to rename columns so as to get the appropriate
#data where it needs to be for the script to run properly.
#colnames(X)[7] <- "Modifier1"
#colnames(X)[8] <- "Conc1"
#colnames(X)[10] <- "Drug"
#colnames(X)[11] <- "Conc"
#set the OrfRep to YDL227C for the ref data
X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C"
#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used.
#That is the first DM plates are removed from the data set with the following.
#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin
X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment
#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates.
#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used.
#try(X <- X[X$ORF != "bad_spot",])
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++=
#get total number of drug concentrations
Total_Conc_Nums <- length(unique(X$Conc))
#function to ID numbers in string with characters+numbers (ie to get numeric drug conc)
numextract <- function(string){
str_extract(string, "\\-*\\d+\\.*\\d*")
}
#generate a new column with the numeric drug concs
X$Conc_Num <- as.numeric(numextract(X$Conc))
#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later
X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1
#Get the max factor for concentration
MAX_CONC <- max(X$Conc_Num_Factor)
#if treating numbers not as factors uncomment next line and comment out previous line
#MAX_CONC <- max(X$Conc_Num)
#remove wells with problems for making graphs and to not include in summary statistics
X <- X[X$Gene != "BLANK",]
X <- X[X$Gene != "Blank",]
X <- X[X$ORF != "Blank",]
X <- X[X$Gene != "blank",]
#X <- X[X$Gene != "HO",]
Xbu= X
#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results
#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor.
SGDgeneList= "../Code/SGD_features.tab"
genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11))))
for(i in 1:length(X[,14])){
ii= as.numeric(i)
line_num = match(X[ii,14],genes[,1],nomatch=1)
OrfRepColNum= as.numeric(match('OrfRep',names(X)))
if(X[ii,OrfRepColNum]!= "YDL227C"){
X[ii,15] = genes[line_num,2]
}
if((X[ii,15] == "")||(X[ii,15] == "OCT1")){
X[ii,15] = X[ii,OrfRepColNum]
}
}
Xblankreplace= X
#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing
#Remove dAmPs *******************************
#jlh confirmed to leave dAmps in so comment out this section
#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt"
#Damps <- read.delim(DAmPs_List,header=F)
#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1]
#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control
# ***********
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
print("ln137 End of User Section including blank gene writeOver")
#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
library(grid)
library(ggthemes)
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#print timestamp for initial time the code starts
timestamp()
#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++
###Part 2 - Quality control
#print quality control graphs for each dataset before removing data due to contamination
#and before adjusting missing data to max theoretical values
#plate analysis plot
#plate analysis is a quality check to identify plate effects containing anomalies
Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication()
Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication()
Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication()
Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication()
Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication()
Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication()
Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication()
Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication()
#quality control - values with a high delta background likely have heavy contamination
#check the frequency of these values
#report the L and K values of these spots
#report the number to be removed based on the Delta_Background_Tolerance
X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd
#raw l vs K before QC
Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
ggtitle("Raw L vs K before QC") +
theme_Publication_legend_right()
pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8)
Raw_l_vs_K_beforeQC
dev.off()
pgg <- ggplotly(Raw_l_vs_K_beforeQC)
#pgg
plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="")
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
#set delta background tolerance based on 3 sds from the mean delta background
Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd))
#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd))
print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" "))
Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication()
Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication()
X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,]
X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2
X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2
X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1]
X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) +
annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian,
label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) +
theme_Publication_legend_right()
pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8)
X_Delta_Backgrd_above_Tolerance_L_vs_K
dev.off()
pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K)
#pgg
plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="")
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
#frequency plot for all data vs. the delta_background
DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() +
ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right()
#bar plot for all data vs. the delta_background
DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() +
ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right()
pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8)
print(DeltaBackground_Frequency_Plot)
print(DeltaBackground_Bar_Plot)
dev.off()
#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values
#1 for missing data, 0 for non missing data
#Use "NG" for NoGrowth rather than "missing"
X$NG <- 0
try(X[X$l == 0 & !is.na(X$l),]$NG <- 1)
#1 for removed data, 0 non removed data
#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed"
X$DB <- 0
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1)
#replace the CPPs for l, r, AUC and K (must be last!) for removed data
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA)
Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
#print the plate analysis data before and after QC
pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9)
Plate_Analysis_L
Plate_Analysis_L_afterQC
Plate_Analysis_K
Plate_Analysis_K_afterQC
Plate_Analysis_r
Plate_Analysis_r_afterQC
Plate_Analysis_AUC
Plate_Analysis_AUC_afterQC
Plate_Analysis_Delta_Backgrd
Plate_Analysis_Delta_Backgrd_afterQC
dev.off()
#print the plate analysis data before and after QC
pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9)
Plate_Analysis_L_Box
Plate_Analysis_L_Box_afterQC
Plate_Analysis_K_Box
Plate_Analysis_K_Box_afterQC
Plate_Analysis_r_Box
Plate_Analysis_r_Box_afterQC
Plate_Analysis_AUC_Box
Plate_Analysis_AUC_Box_afterQC
Plate_Analysis_Delta_Backgrd_Box
Plate_Analysis_Delta_Backgrd_Box_afterQC
dev.off()
#remove the zero values and print plate analysis
X_noZero <- X[which(X$l > 0),]
Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
#print the plate analysis data before and after QC
pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9)
Plate_Analysis_L_afterQC_Z
Plate_Analysis_K_afterQC_Z
Plate_Analysis_r_afterQC_Z
Plate_Analysis_AUC_afterQC_Z
Plate_Analysis_Delta_Backgrd_afterQC_Z
dev.off()
#print the plate analysis data before and after QC
pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9)
Plate_Analysis_L_Box_afterQC_Z
Plate_Analysis_K_Box_afterQC_Z
Plate_Analysis_r_Box_afterQC_Z
Plate_Analysis_AUC_Box_afterQC_Z
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z
dev.off()
#remove dataset with zeros removed
rm(X_noZero)
#X_test_missing_and_removed <- X[X$Removed == 1,]
#calculate summary statistics for all strains, including both background and the deletions
X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean_L = mean(l,na.rm=TRUE),
median_L = median(l,na.rm=TRUE),
max_L = max(l,na.rm=TRUE),
min_L = min(l,na.rm=TRUE),
sd_L = sd(l,na.rm=TRUE),
se_L = sd_L / sqrt(N-1),
mean_K = mean(K,na.rm=TRUE),
median_K = median(K,na.rm=TRUE),
max_K = max(K,na.rm=TRUE),
min_K = min(K,na.rm=TRUE),
sd_K = sd(K,na.rm=TRUE),
se_K = sd_K / sqrt(N-1),
mean_r = mean(r,na.rm=TRUE),
median_r = median(r,na.rm=TRUE),
max_r = max(r,na.rm=TRUE),
min_r = min(r,na.rm=TRUE),
sd_r = sd(r,na.rm=TRUE),
se_r = sd_r / sqrt(N-1),
mean_AUC = mean(AUC,na.rm=TRUE),
median_AUC = median(AUC,na.rm=TRUE),
max_AUC = max(AUC,na.rm=TRUE),
min_AUC = min(AUC,na.rm=TRUE),
sd_AUC = sd(AUC,na.rm=TRUE),
se_AUC = sd_AUC / sqrt(N-1)
)
#print(X_stats_ALL_L)
write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE)
#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++
##### Part 3 - Generate summary statistics and calculate the max theoretical L value
##### Calculate the Z score at each drug conc for each deletion strain
#get the background strains - can be modified to take another argument but for most screens will just be YDL227C
Background_Strains <- c("YDL227C")
#first part of loop will go through for each background strain
#most cases there will only be one YDL227C
for(s in Background_Strains){
X_Background <- X[X$OrfRep == s,]
#if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics
#we may want to consider in some cases giving the max high value to L depending on the data type
if(table(X_Background$l)[1] == 0){
X_Background[X_Background$l == 0,]$l <- NA
X_Background[X_Background$K == 0,]$K <- NA
X_Background[X_Background$r == 0,]$r <- NA
X_Background[X_Background$AUC == 0,]$AUC <- NA
}
X_Background <- X_Background[!is.na(X_Background$l),]
#get summary stats for L, K, R, AUC
X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean = mean(l,na.rm=TRUE),
median = median(l,na.rm=TRUE),
max = max(l,na.rm=TRUE),
min = min(l,na.rm=TRUE),
sd = sd(l,na.rm=TRUE),
se = sd / sqrt(N-1)
)
print(X_stats_BY_L)
X1_SD <- max(X_stats_BY_L$sd)
X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
N = (length(K)),
mean = mean(K,na.rm=TRUE),
median = median(K,na.rm=TRUE),
max = max(K,na.rm=TRUE),
min = min(K,na.rm=TRUE),
sd = sd(K,na.rm=TRUE),
se = sd / sqrt(N-1)
)
X1_SD_K <- max(X_stats_BY_K$sd)
X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
N = length(r),
mean = mean(r,na.rm=TRUE),
median = median(r,na.rm=TRUE),
max = max(r,na.rm=TRUE),
min = min(r,na.rm=TRUE),
sd = sd(r,na.rm=TRUE),
se = sd / sqrt(N-1)
)
X1_SD_r <- max(X_stats_BY_r$sd)
X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
N = length(AUC),
mean = mean(AUC,na.rm=TRUE),
median = median(AUC,na.rm=TRUE),
max = max(AUC,na.rm=TRUE),
min = min(AUC,na.rm=TRUE),
sd = sd(AUC,na.rm=TRUE),
se = sd / sqrt(N-1)
)
X1_SD_AUC <- max(X_stats_BY_AUC$sd)
X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean_L = mean(l,na.rm=TRUE),
median_L = median(l,na.rm=TRUE),
max_L = max(l,na.rm=TRUE),
min_L = min(l,na.rm=TRUE),
sd_L = sd(l,na.rm=TRUE),
se_L = sd_L / sqrt(N-1),
mean_K = mean(K,na.rm=TRUE),
median_K = median(K,na.rm=TRUE),
max_K = max(K,na.rm=TRUE),
min_K = min(K,na.rm=TRUE),
sd_K = sd(K,na.rm=TRUE),
se_K = sd_K / sqrt(N-1),
mean_r = mean(r,na.rm=TRUE),
median_r = median(r,na.rm=TRUE),
max_r = max(r,na.rm=TRUE),
min_r = min(r,na.rm=TRUE),
sd_r = sd(r,na.rm=TRUE),
se_r = sd_r / sqrt(N-1),
mean_AUC = mean(AUC,na.rm=TRUE),
median_AUC = median(AUC,na.rm=TRUE),
max_AUC = max(AUC,na.rm=TRUE),
min_L = min(AUC,na.rm=TRUE),
sd_AUC = sd(AUC,na.rm=TRUE),
se_AUC = sd_AUC / sqrt(N-1)
)
write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE)
#calculate the max theoretical L values
#only look for max values when K is within 2SD of the ref strain
for(q in unique(X$Conc_Num_Factor)){
if(q == 0){
X_within_2SD_K <- X[X$Conc_Num_Factor == q,]
X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),]
X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,]
X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
X_outside_2SD_K <- X[X$Conc_Num_Factor == q,]
X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),]
#X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
#X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
}
if(q > 0){
X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,]
X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),]
X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),]
X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),]
X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp)
X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,]
X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),]
#X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
#X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp)
}
}
X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l,na.rm=TRUE),
min = min(l,na.rm=TRUE),
sd = sd(l),
se = sd / sqrt(N-1),
z_max = (max-mean)/sd
)
print(X_stats_BY_L_within_2SD_K)
X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd)
write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE)
X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l,na.rm=TRUE),
min = min(l,na.rm=TRUE),
sd = sd(l),
se = sd / sqrt(N-1)
)
print(X_stats_BY_L_outside_2SD_K)
X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd)
#X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,]
Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right()
pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8)
print(Outside_2SD_K_L_vs_K)
dev.off()
pgg <- ggplotly(Outside_2SD_K_L_vs_K)
plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="")
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") +
ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right()
pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8)
Outside_2SD_K_delta_background_vs_K
dev.off()
pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K)
#pgg
plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="")
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
#get the background strain mean values at the no drug conc to calculate shift
Background_L <- X_stats_BY_L$mean[1]
Background_K <- X_stats_BY_K$mean[1]
Background_r <- X_stats_BY_r$mean[1]
Background_AUC <- X_stats_BY_AUC$mean[1]
#create empty plots for plotting element
p_l <- ggplot()
p_K <- ggplot()
p_r <- ggplot()
p_AUC <- ggplot()
p_rf_l <- ggplot()
p_rf_K <- ggplot()
p_rf_r <- ggplot()
p_rf_AUC <- ggplot()
#get only the deletion strains
X2 <- X
X2 <- X2[X2$OrfRep != "YDL227C",]
#if set to max theoretical value, add a 1 to SM, if not, leave as 0
#SM = Set to Max
X2$SM <- 0
#set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min
for(i in 1:length(unique(X2$Conc_Num))){
Concentration <- unique(X2$Conc_Num)[i]
X2_temp <- X2[X2$Conc_Num == Concentration,]
if(Concentration == 0){
X2_new <- X2_temp
print(paste("Check loop order, conc =",Concentration,sep=" "))
}
if(Concentration > 0){
try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1)
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
#X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i]
#X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i]
#X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i]
print(paste("Check loop order, conc =",Concentration,sep=" "))
X2_new <- rbind(X2_new,X2_temp)
}
}
X2 <- X2_new
#get only the RF strains
X2_RF <- X
X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",]
#if set to max theoretical value, add a 1 to SM, if not, leave as 0
#SM = Set to Max
X2_RF$SM <- 0
#set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min
for(i in 1:length(unique(X2_RF$Conc_Num))){
Concentration <- unique(X2_RF$Conc_Num)[i]
X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,]
if(Concentration == 0){
X2_RF_new <- X2_RF_temp
print(paste("Check loop order, conc =",Concentration,sep=" "))
}
if(Concentration > 0){
try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1)
try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" "))
X2_RF_new <- rbind(X2_RF_new,X2_RF_temp)
}
}
X2_RF <- X2_RF_new
#######Part 4 Get the RF Z score values
#Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score
X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_")
num_genes_RF <- length(unique(X2_RF$OrfRep))
print(num_genes_RF)
#create the output data.frame containing columns for each RF strain
InteractionScores_RF <- unique(X2_RF["OrfRep"])
#InteractionScores_RF$Gene <- unique(X2$Gene)
InteractionScores_RF$Gene <- NA
InteractionScores_RF$Raw_Shift_L <- NA
InteractionScores_RF$Z_Shift_L <- NA
InteractionScores_RF$lm_Score_L <- NA
InteractionScores_RF$Z_lm_L <- NA
InteractionScores_RF$R_Squared_L <- NA
InteractionScores_RF$Sum_Z_Score_L <- NA
InteractionScores_RF$Avg_Zscore_L <- NA
InteractionScores_RF$Raw_Shift_K <- NA
InteractionScores_RF$Z_Shift_K <- NA
InteractionScores_RF$lm_Score_K <- NA
InteractionScores_RF$Z_lm_K <- NA
InteractionScores_RF$R_Squared_K <- NA
InteractionScores_RF$Sum_Z_Score_K <- NA
InteractionScores_RF$Avg_Zscore_K <- NA
InteractionScores_RF$Raw_Shift_r <- NA
InteractionScores_RF$Z_Shift_r <- NA
InteractionScores_RF$lm_Score_r <- NA
InteractionScores_RF$Z_lm_r <- NA
InteractionScores_RF$R_Squared_r <- NA
InteractionScores_RF$Sum_Z_Score_r <- NA
InteractionScores_RF$Avg_Zscore_r <- NA
InteractionScores_RF$Raw_Shift_AUC <- NA
InteractionScores_RF$Z_Shift_AUC <- NA
InteractionScores_RF$lm_Score_AUC <- NA
InteractionScores_RF$Z_lm_AUC <- NA
InteractionScores_RF$R_Squared_AUC <- NA
InteractionScores_RF$Sum_Z_Score_AUC <- NA
InteractionScores_RF$Avg_Zscore_AUC <- NA
InteractionScores_RF$NG <- NA
InteractionScores_RF$SM <- NA
for(i in 1:num_genes_RF){
#get each deletion strain ORF
Gene_Sel <- unique(X2_RF$OrfRep)[i]
#extract only the current deletion strain and its data
X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,]
X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean_L = mean(l,na.rm = TRUE),
median_L = median(l,na.rm = TRUE),
sd_L = sd(l,na.rm = TRUE),
se_L = sd_L / sqrt(N-1),
mean_K = mean(K,na.rm = TRUE),
median_K = median(K,na.rm = TRUE),
sd_K = sd(K,na.rm = TRUE),
se_K = sd_K / sqrt(N-1),
mean_r = mean(r,na.rm = TRUE),
median_r = median(r,na.rm = TRUE),
sd_r = sd(r,na.rm = TRUE),
se_r = sd_r / sqrt(N-1),
mean_AUC = mean(AUC,na.rm = TRUE),
median_AUC = median(AUC,na.rm = TRUE),
sd_AUC = sd(AUC,na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N-1),
NG = sum(NG,na.rm=TRUE),
DB= sum(DB,na.rm=TRUE),
SM = sum(SM,na.rm=TRUE)
)
#Get shift vals
#if L is 0, that means the no growth on no drug
#if L is NA at 0, that means the spot was removed due to contamination
#if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
#otherwise calculate shift at no drug conc
if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){
X_stats_interaction$Raw_Shift_L <- 0
X_stats_interaction$Raw_Shift_K <- 0
X_stats_interaction$Raw_Shift_r <- 0
X_stats_interaction$Raw_Shift_AUC <- 0
X_stats_interaction$Z_Shift_L <- 0
X_stats_interaction$Z_Shift_K <- 0
X_stats_interaction$Z_Shift_r <- 0
X_stats_interaction$Z_Shift_AUC <- 0
}else{
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1]
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1]
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1]
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1]
}
#get WT vals
X_stats_interaction$WT_l <- X_stats_BY_L$mean
X_stats_interaction$WT_K <- X_stats_BY_K$mean
X_stats_interaction$WT_r <- X_stats_BY_r$mean
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
#Get WT SD
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
#only get scores if there's growth at no drug
if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){
#calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
#calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
#disregard shift for no growth values in Z score calculation
if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
}
#disregard shift for set to max values in Z score calculation
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
#only calculate the L value without shift since L is the only adjusted value
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
}
#calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
#get linear model
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction)
#get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
r_squared_l <- summary(gene_lm_L)$r.squared
gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
r_squared_K <- summary(gene_lm_K)$r.squared
gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
#Get total of non removed values
Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1
#report the scores
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1]
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1]
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc)
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc)
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1)
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1)
}
if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){
#calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
#calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
#disregard shift for missing values in Z score calculatiom
if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
}
#disregard shift for set to max values in Z score calculation
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
#only calculate the L value without shift since L is the only adjusted value
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
}
#calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
#NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_L <- NA
gene_lm_K <- NA
gene_lm_r <- NA
gene_lm_AUC <- NA
gene_interaction_L <- NA
r_squared_l <- NA
gene_interaction_K <- NA
r_squared_K <- NA
gene_interaction_r <- NA
r_squared_r <- NA
gene_interaction_AUC <- NA
r_squared_AUC <- NA
X_stats_interaction$Raw_Shift_L <- NA
X_stats_interaction$Raw_Shift_K <- NA
X_stats_interaction$Raw_Shift_r <- NA
X_stats_interaction$Raw_Shift_AUC <- NA
X_stats_interaction$Z_Shift_L <- NA
X_stats_interaction$Z_Shift_K <- NA
X_stats_interaction$Z_Shift_r <- NA
X_stats_interaction$Z_Shift_AUC <- NA
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1]
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1]
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
}
if(i == 1){
X_stats_interaction_ALL_RF <- X_stats_interaction
}
if(i > 1){
X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction)
}
InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE)
InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE)
InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE)
#X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int)
}
print("Pass RF Calculation loop")
lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE)
lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE)
lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE)
lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE)
lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE)
lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE)
lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE)
lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE)
print(paste("Mean RF linear regression score L",lm_mean_L))
InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L)
InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K)
InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r)
InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC)
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),]
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),]
write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE)
for(i in 1:num_genes_RF){
Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i]
X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,]
X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,]
p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-65,65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) +
annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) +
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
theme_Publication()
p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-65,65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) +
annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) +
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
theme_Publication()
p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-0.65,0.65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) +
annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) +
annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) +
theme_Publication()
p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-6500,6500)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) +
annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) +
annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) +
theme_Publication()
if(i == 1){
X_stats_interaction_ALL_RF_final <- X_ZCalculations
}
if(i > 1){
X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations)
}
}
print("Pass RF ggplot loop")
write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE)
####### Part 5 - Get Zscores for Gene deletion strains
#get total number of genes for the next loop
num_genes <- length(unique(X2$OrfRep))
print(num_genes)
#create the output data.frame containing columns for each deletion strain
InteractionScores <- unique(X2["OrfRep"])
#InteractionScores$Gene <- unique(X2$Gene)
InteractionScores$Gene <- NA
InteractionScores$Raw_Shift_L <- NA
InteractionScores$Z_Shift_L <- NA
InteractionScores$lm_Score_L <- NA
InteractionScores$Z_lm_L <- NA
InteractionScores$R_Squared_L <- NA
InteractionScores$Sum_Z_Score_L <- NA
InteractionScores$Avg_Zscore_L <- NA
InteractionScores$Raw_Shift_K <- NA
InteractionScores$Z_Shift_K <- NA
InteractionScores$lm_Score_K <- NA
InteractionScores$Z_lm_K <- NA
InteractionScores$R_Squared_K <- NA
InteractionScores$Sum_Z_Score_K <- NA
InteractionScores$Avg_Zscore_K <- NA
InteractionScores$Raw_Shift_r <- NA
InteractionScores$Z_Shift_r <- NA
InteractionScores$lm_Score_r <- NA
InteractionScores$Z_lm_r <- NA
InteractionScores$R_Squared_r <- NA
InteractionScores$Sum_Z_Score_r <- NA
InteractionScores$Avg_Zscore_r <- NA
InteractionScores$Raw_Shift_AUC <- NA
InteractionScores$Z_Shift_AUC <- NA
InteractionScores$lm_Score_AUC <- NA
InteractionScores$Z_lm_AUC <- NA
InteractionScores$R_Squared_AUC <- NA
InteractionScores$Sum_Z_Score_AUC <- NA
InteractionScores$Avg_Zscore_AUC <- NA
InteractionScores$NG <- NA
InteractionScores$DB <- NA
InteractionScores$SM <- NA
for(i in 1:num_genes){
#get each deletion strain ORF
Gene_Sel <- unique(X2$OrfRep)[i]
#extract only the current deletion strain and its data
X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,]
X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise,
N = (length(l)),
mean_L = mean(l,na.rm = TRUE),
median_L = median(l,na.rm = TRUE),
sd_L = sd(l,na.rm = TRUE),
se_L = sd_L / sqrt(N-1),
mean_K = mean(K,na.rm = TRUE),
median_K = median(K,na.rm = TRUE),
sd_K = sd(K,na.rm = TRUE),
se_K = sd_K / sqrt(N-1),
mean_r = mean(r,na.rm = TRUE),
median_r = median(r,na.rm = TRUE),
sd_r = sd(r,na.rm = TRUE),
se_r = sd_r / sqrt(N-1),
mean_AUC = mean(AUC,na.rm = TRUE),
median_AUC = median(AUC,na.rm = TRUE),
sd_AUC = sd(AUC,na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N-1),
NG = sum(NG,na.rm=TRUE),
DB= sum(DB,na.rm=TRUE),
SM = sum(SM,na.rm=TRUE)
)
#Get shift vals
#if L is 0, that means the no growth on no drug
#if L is NA at 0, that means the spot was removed due to contamination
#if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
#otherwise calculate shift at no drug conc
if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){
X_stats_interaction$Raw_Shift_L <- 0
X_stats_interaction$Raw_Shift_K <- 0
X_stats_interaction$Raw_Shift_r <- 0
X_stats_interaction$Raw_Shift_AUC <- 0
X_stats_interaction$Z_Shift_L <- 0
X_stats_interaction$Z_Shift_K <- 0
X_stats_interaction$Z_Shift_r <- 0
X_stats_interaction$Z_Shift_AUC <- 0
}else{
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1]
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1]
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1]
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1]
}
#get WT vals
X_stats_interaction$WT_l <- X_stats_BY_L$mean
X_stats_interaction$WT_K <- X_stats_BY_K$mean
X_stats_interaction$WT_r <- X_stats_BY_r$mean
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
#Get WT SD
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
#only get scores if there's growth at no drug
if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){
#calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
#calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
#disregard shift for no growth values in Z score calculation
if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
}
#disregard shift for set to max values in Z score calculation
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
#only calculate the L value without shift since L is the only adjusted value
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
}
#calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
#get linear model
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction)
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction)
#get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
r_squared_l <- summary(gene_lm_L)$r.squared
gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
r_squared_K <- summary(gene_lm_K)$r.squared
gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
#Get total of non removed values
Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1
#report the scores
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1])
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1])
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc)
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc)
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1)
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1)
}
if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){
#calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
#calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
#disregard shift for missing values in Z score calculatiom
if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
}
#disregard shift for set to max values in Z score calculation
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
#only calculate the L value without shift since L is the only adjusted value
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
}
#calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
#NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_L <- NA
gene_lm_K <- NA
gene_lm_r <- NA
gene_interaction_L <- NA
r_squared_l <- NA
gene_interaction_K <- NA
r_squared_K <- NA
gene_interaction_r <- NA
r_squared_r <- NA
X_stats_interaction$Raw_Shift_L <- NA
X_stats_interaction$Raw_Shift_K <- NA
X_stats_interaction$Raw_Shift_r <- NA
X_stats_interaction$Raw_Shift_AUC <- NA
X_stats_interaction$Z_Shift_L <- NA
X_stats_interaction$Z_Shift_K <- NA
X_stats_interaction$Z_Shift_r <- NA
X_stats_interaction$Z_Shift_AUC <- NA
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1])
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1])
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
}
if(i == 1){
X_stats_interaction_ALL <- X_stats_interaction
}
if(i > 1){
X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction)
}
InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE)
InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE)
InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE)
#X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int)
}
print("Pass Int Calculation loop")
InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),]
InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),]
df_order_by_OrfRep <- unique(InteractionScores$OrfRep)
#X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),]
write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE)
InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,]
InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,]
InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,]
InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,]
InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,]
InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,]
InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),]
InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),]
InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),]
InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),]
InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),]
InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),]
write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE)
#get enhancers and suppressors for linear regression
InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,]
InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,]
InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,]
InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,]
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,]
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,]
InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),]
InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),]
InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),]
InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),]
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),]
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),]
write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE)
write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE)
print('ln 1570 write StudyInfo.csv after ')
#write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE)
for(i in 1:num_genes){
Gene_Sel <- unique(InteractionScores$OrfRep)[i]
X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,]
X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,]
p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-65,65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) +
annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) +
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
theme_Publication()
p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-65,65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) +
annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) +
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
theme_Publication()
p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-0.65,0.65)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) +
annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) +
annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) +
theme_Publication()
p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
coord_cartesian(ylim = c(-6500,6500)) +
geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) +
#annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) +
annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) +
annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) +
annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) +
annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) +
theme_Publication()
if(i == 1){
X_stats_interaction_ALL_final <- X_ZCalculations
}
if(i > 1){
X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations)
}
}
print("Pass Int ggplot loop")
write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE)
Blank <- ggplot(X2_RF) + geom_blank()
pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise,
mean_L = mean(l,na.rm=TRUE),
median_L = median(l,na.rm=TRUE),
max_L = max(l,na.rm=TRUE),
min_L = min(l,na.rm=TRUE),
sd_L = sd(l,na.rm=TRUE),
mean_K = mean(K,na.rm=TRUE),
median_K = median(K,na.rm=TRUE),
max_K = max(K,na.rm=TRUE),
min_K = min(K,na.rm=TRUE),
sd_K = sd(K,na.rm=TRUE),
mean_r = mean(r,na.rm=TRUE),
median_r = median(r,na.rm=TRUE),
max_r = max(r,na.rm=TRUE),
min_r = min(r,na.rm=TRUE),
sd_r = sd(r,na.rm=TRUE),
mean_AUC = mean(AUC,na.rm=TRUE),
median_AUC = median(AUC,na.rm=TRUE),
max_AUC = max(AUC,na.rm=TRUE),
min_AUC = min(AUC,na.rm=TRUE),
sd_AUC = sd(AUC,na.rm=TRUE),
NG = sum(NG,na.rm=TRUE),
DB = sum(DB,na.rm=TRUE),
SM = sum(SM,na.rm=TRUE)
)
L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
annotate("text",x=-0.25,y=10,label="NG") +
annotate("text",x=-0.25,y=5,label="DB") +
annotate("text",x=-0.25,y=0,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) +
theme_Publication()
K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) +
annotate("text",x=-0.25,y=-5,label="NG") +
annotate("text",x=-0.25,y=-12.5,label="DB") +
annotate("text",x=-0.25,y=-20,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) +
theme_Publication()
R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
annotate("text",x=-0.25,y=.9,label="NG") +
annotate("text",x=-0.25,y=.8,label="DB") +
annotate("text",x=-0.25,y=.7,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) +
theme_Publication()
AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
annotate("text",x=-0.25,y=11000,label="NG") +
annotate("text",x=-0.25,y=10000,label="DB") +
annotate("text",x=-0.25,y=9000,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) +
theme_Publication()
L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
theme_Publication()
K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
theme_Publication()
r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
theme_Publication()
AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
theme_Publication()
grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2)
grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2)
#plot the references
#grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4)
#grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2)
#loop for grid arrange 4x3
j <- rep(1,((num_genes)/3)-1)
for(n in 1:length(j)){
j[n+1] <- n*3 + 1
}
#loop for printing each plot
num <- 0
for(m in 1:(round((num_genes)/3)-1)){
num <- j[m]
grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3)
#grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3)
#p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]]
}
if(num_genes != (num+2)){
total_num = num_genes - (num+2)
if(total_num == 5){
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
}
if(total_num == 4){
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
}
if(total_num == 3){
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
if(total_num == 2){
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
if(total_num == 1){
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
}
dev.off()
pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise,
mean_L = mean(l,na.rm=TRUE),
median_L = median(l,na.rm=TRUE),
max_L = max(l,na.rm=TRUE),
min_L = min(l,na.rm=TRUE),
sd_L = sd(l,na.rm=TRUE),
mean_K = mean(K,na.rm=TRUE),
median_K = median(K,na.rm=TRUE),
max_K = max(K,na.rm=TRUE),
min_K = min(K,na.rm=TRUE),
sd_K = sd(K,na.rm=TRUE),
mean_r = mean(r,na.rm=TRUE),
median_r = median(r,na.rm=TRUE),
max_r = max(r,na.rm=TRUE),
min_r = min(r,na.rm=TRUE),
sd_r = sd(r,na.rm=TRUE),
mean_AUC = mean(AUC,na.rm=TRUE),
median_AUC = median(AUC,na.rm=TRUE),
max_AUC = max(AUC,na.rm=TRUE),
min_AUC = min(AUC,na.rm=TRUE),
sd_AUC = sd(AUC,na.rm=TRUE),
NG = sum(NG,na.rm=TRUE),
DB = sum(DB,na.rm=TRUE),
SM = sum(SM,na.rm=TRUE)
)
L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
annotate("text",x=-0.25,y=10,label="NG") +
annotate("text",x=-0.25,y=5,label="DB") +
annotate("text",x=-0.25,y=0,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) +
theme_Publication()
K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) +
annotate("text",x=-0.25,y=-5,label="NG") +
annotate("text",x=-0.25,y=-12.5,label="DB") +
annotate("text",x=-0.25,y=-20,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) +
theme_Publication()
R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
annotate("text",x=-0.25,y=.9,label="NG") +
annotate("text",x=-0.25,y=.8,label="DB") +
annotate("text",x=-0.25,y=.7,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) +
theme_Publication()
AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) +
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
annotate("text",x=-0.25,y=11000,label="NG") +
annotate("text",x=-0.25,y=10000,label="DB") +
annotate("text",x=-0.25,y=9000,label="SM") +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) +
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) +
theme_Publication()
L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
theme_Publication()
K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
theme_Publication()
r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
theme_Publication()
AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() +
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) +
theme_Publication()
grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2)
grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2)
#plot the references
#grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4)
#grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2)
#loop for grid arrange 4x3
j <- rep(1,((num_genes_RF)/3)-1)
for(n in 1:length(j)){
j[n+1] <- n*3 + 1
}
#loop for printing each plot
num <- 0
for(m in 1:(round((num_genes_RF)/3)-1)){
num <- j[m]
grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3)
#grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3)
#p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]]
}
if(num_genes_RF != (num+2)){
total_num = num_genes_RF - (num+2)
if(total_num == 5){
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
}
if(total_num == 4){
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
}
if(total_num == 3){
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
if(total_num == 2){
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
if(total_num == 1){
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
}
}
dev.off()
#print rank plots for L and K gene interactions
InteractionScores_AdjustMissing <- InteractionScores
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001
InteractionScores_AdjustMissing$L_Rank <- NA
InteractionScores_AdjustMissing$K_Rank <- NA
InteractionScores_AdjustMissing$r_Rank <- NA
InteractionScores_AdjustMissing$AUC_Rank <- NA
InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L)
InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K)
InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r)
InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC)
#
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001
InteractionScores_AdjustMissing$L_Rank_lm <- NA
InteractionScores_AdjustMissing$K_Rank_lm <- NA
InteractionScores_AdjustMissing$r_Rank_lm <- NA
InteractionScores_AdjustMissing$AUC_Rank_lm <- NA
InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L)
InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K)
InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r)
InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC)
Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) +
theme_Publication()
Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) +
theme_Publication()
Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) +
theme_Publication()
Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) +
theme_Publication()
Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) +
theme_Publication()
Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) +
theme_Publication()
Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2)
grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2)
dev.off()
Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) +
theme_Publication()
Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) +
theme_Publication()
Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) +
theme_Publication()
Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) +
theme_Publication()
Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) +
theme_Publication()
Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) +
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) +
theme_Publication()
Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2)
grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2)
dev.off()
X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,]
#find overlaps
X_NArm$Overlap <- "No Effect"
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both")
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only")
try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only")
try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only")
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score")
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L)
L_lm <- summary(get_lm_L)
get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K)
K_lm <- summary(get_lm_K)
get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r)
r_lm <- summary(get_lm_r)
get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC)
AUC_lm <- summary(get_lm_AUC)
pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE)
print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Avg Zscore vs lm K") +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Avg Zscore vs lm r") +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Avg Zscore vs lm AUC") +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right())
dev.off()
lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) +
geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()
pgg <- ggplotly(lm_v_Zscore_L)
#pgg
plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="")
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L)
X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K)
X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r)
X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC)
X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L)
X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K)
X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r)
X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC)
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank)
L_lm2 <- summary(get_lm_L2)
get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank)
K_lm2 <- summary(get_lm_K2)
get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank)
r_lm2 <- summary(get_lm_r2)
get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank)
AUC_lm2 <- summary(get_lm_AUC2)
num_genes_NArm2 <- (dim(X_NArm)[1])/2
pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE)
print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Rank Avg Zscore vs lm L") +
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Rank Avg Zscore vs lm K") +
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Rank Avg Zscore vs lm r") +
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right())
print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
ggtitle("Rank of Avg Zscore vs lm AUC") +
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right())
dev.off()
Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) +
theme_Publication()
Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) +
theme_Publication()
Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) +
theme_Publication()
Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) +
theme_Publication()
Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) +
theme_Publication()
Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) +
theme_Publication()
Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2)
grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2)
dev.off()
Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) +
theme_Publication()
Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) +
theme_Publication()
Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) +
theme_Publication()
Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) +
theme_Publication()
Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) +
theme_Publication()
Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) +
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) +
theme_Publication()
Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
theme_Publication()
Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
theme_Publication()
pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2)
grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2)
dev.off()
}
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L)
L_lm_1 <- summary(get_lm_1)
get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L)
L_lm_2 <- summary(get_lm_2)
get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L)
L_lm_3 <- summary(get_lm_3)
get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K)
L_lm_4 <- summary(get_lm_4)
get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K)
L_lm_5 <- summary(get_lm_5)
get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r)
L_lm_6 <- summary(get_lm_6)
pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE)
ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") + ylab("z-score K") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") + ylab("z-score r") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") + ylab("z-score r") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_smooth(method="lm",color="tomato3") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),]
ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") + ylab("z-score K") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") + ylab("z-score r") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") + ylab("z-score r") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") + ylab("z-score AUC") +
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) +
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
dev.off()
#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE)
timestamp()
#BoneYard***********************************************
#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced.
#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt.
#Corruptable but better than nothing.
#if(is.na(Args[2])){
# std=3
#}else {
# std= Arg[2]
#Delta_Background_sdFactor <- 2 #Args[3]
#DelBGFactr <- as.numeric(Delta_Background_sdFactor)
#}