#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. #Adapt SS For Structured Data storage but using command line scripts ###Set up the required libraries, call required plot theme elements and set up the command line arguments library("ggplot2") library("plyr") library("extrafont") library("gridExtra") library("gplots") library("RColorBrewer") library("stringr") #library("gdata") library(plotly) library(htmlwidgets) Args <- commandArgs(TRUE) input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt #Path to Output Directory W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") subDir <- outDir #Args[2] if (file.exists(subDir)){ outputpath <- subDir } else { dir.create(file.path(subDir)) } if (file.exists(paste(subDir,"QC/",sep=""))){ outputpath_QC <- paste(subDir,"QC/",sep="") } else { dir.create(file.path(paste(subDir,"QC/",sep=""))) outputpath_QC <- paste(subDir,"QC/",sep="") } #define the output path (formerly the second argument from Rscript) outputpath <- outDir #Set Args[2] the Background contamination noise filter as a function of standard deviation #Sean recommends 3 or 5 SD factor. #Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") std= as.numeric(Args[2]) expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) Labels[expNumber,3]= as.numeric(std) Delta_Background_sdFactor <- std delBGFactor <- as.numeric(Delta_Background_sdFactor) #Write Background SD value to studyInfo.txt file #write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) print('ln 50 write StudyInfo.csv ') #write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ #read in the data X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) X <- X[!(X[[1]]%in%c("","Scan")),] #X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 #X <- X[which(X$Specifics == "WT"),] X_length <- length(X[1,]) X_end <- length(X[1,]) - 2 X <- X[,c(1:46,X_end:X_length)] #use numeric data to perform operations X$Col <- as.numeric(X$Col) X$Row <- as.numeric(X$Row) X$l <- as.numeric(X$l) X$K <- as.numeric(X$K) X$r <- as.numeric(X$r) X$Scan <- as.numeric(X$Scan) X$AUC <- as.numeric(X$AUC) X$LstBackgrd <- as.numeric(X$LstBackgrd) X$X1stBackgrd <- as.numeric(X$X1stBackgrd) #set the OrfRep to YDL227C for the ref data X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" #Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. #That is the first DM plates are removed from the data set with the following. X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 #Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. #This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. #try(X <- X[X$ORF != "bad_spot",]) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= #get total number of drug concentrations Total_Conc_Nums <- length(unique(X$Conc)) #function to ID numbers in string with characters+numbers (ie to get numeric drug conc) numextract <- function(string){ str_extract(string, "\\-*\\d+\\.*\\d*") } #generate a new column with the numeric drug concs X$Conc_Num <- as.numeric(numextract(X$Conc)) #Generate new column with the numeric drug concs as factors starting at 0 for the graphing later X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 #Get the max factor for concentration MAX_CONC <- max(X$Conc_Num_Factor) #if treating numbers not as factors uncomment next line and comment out previous line #MAX_CONC <- max(X$Conc_Num) #remove wells with problems for making graphs and to not include in summary statistics X <- X[X$Gene != "BLANK",] X <- X[X$Gene != "Blank",] X <- X[X$ORF != "Blank",] X <- X[X$Gene != "blank",] #X <- X[X$Gene != "HO",] Xbu= X #Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later #Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results #in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. SGDgeneList= "../Code/SGD_features.tab" genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) for(i in 1:length(X[,14])){ ii= as.numeric(i) line_num = match(X[ii,14],genes[,1],nomatch=1) OrfRepColNum= as.numeric(match('OrfRep',names(X))) if(X[ii,OrfRepColNum]!= "YDL227C"){ X[ii,15] = genes[line_num,2] } if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ X[ii,15] = X[ii,OrfRepColNum] } } Xblankreplace= X #X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing #Remove dAmPs ******************************* DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" Damps <- read.delim(DAmPs_List,header=F) X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control # *********** #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ print("ln137 End of User Section including blank gene writeOver") #++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ #theme elements for plots theme_Publication <- function(base_size=14, base_family="sans") { library(grid) library(ggthemes) (theme_foundation(base_size=base_size, base_family=base_family) + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold",size = rel(1)), axis.title.y = element_text(angle=90,vjust =2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour="black"), axis.ticks = element_line(), panel.grid.major = element_line(colour="#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size= unit(0.2, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face="italic"), plot.margin=unit(c(10,5,5,5),"mm"), strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), strip.text = element_text(face="bold") )) } scale_fill_Publication <- function(...){ library(scales) discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) } scale_colour_Publication <- function(...){ discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) } theme_Publication_legend_right <- function(base_size=14, base_family="sans") { (theme_foundation(base_size=base_size, base_family=base_family) + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold",size = rel(1)), axis.title.y = element_text(angle=90,vjust =2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour="black"), axis.ticks = element_line(), panel.grid.major = element_line(colour="#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "right", legend.direction = "vertical", legend.key.size= unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face="italic"), plot.margin=unit(c(10,5,5,5),"mm"), strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), strip.text = element_text(face="bold") )) } scale_fill_Publication <- function(...){ discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) } scale_colour_Publication <- function(...){ discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) } #print timestamp for initial time the code starts timestamp() #+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ###Part 2 - Quality control #print quality control graphs for each dataset before removing data due to contamination #and before adjusting missing data to max theoretical values #plate analysis plot #plate analysis is a quality check to identify plate effects containing anomalies Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() #quality control - values with a high delta background likely have heavy contamination #check the frequency of these values #report the L and K values of these spots #report the number to be removed based on the Delta_Background_Tolerance X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd #raw l vs K before QC Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + ggtitle("Raw L vs K before QC") + theme_Publication_legend_right() pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) Raw_l_vs_K_beforeQC dev.off() pgg <- ggplotly(Raw_l_vs_K_beforeQC) #pgg plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") saveWidget(pgg, file=plotly_path, selfcontained =TRUE) #set delta background tolerance based on 3 sds from the mean delta background Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(delBGFactor*sd(X$Delta_Backgrd)) #Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + theme_Publication_legend_right() pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) X_Delta_Backgrd_above_Tolerance_L_vs_K dev.off() pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) #pgg plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") saveWidget(pgg, file=plotly_path, selfcontained =TRUE) #frequency plot for all data vs. the delta_background DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() #bar plot for all data vs. the delta_background DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) print(DeltaBackground_Frequency_Plot) print(DeltaBackground_Bar_Plot) dev.off() #Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values #1 for missing data, 0 for non missing data #Use "NG" for NoGrowth rather than "missing" X$NG <- 0 try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) #1 for removed data, 0 non removed data #Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" X$DB <- 0 try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) #replace the CPPs for l, r, AUC and K (must be last!) for removed data try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() #print the plate analysis data before and after QC pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) Plate_Analysis_L Plate_Analysis_L_afterQC Plate_Analysis_K Plate_Analysis_K_afterQC Plate_Analysis_r Plate_Analysis_r_afterQC Plate_Analysis_AUC Plate_Analysis_AUC_afterQC Plate_Analysis_Delta_Backgrd Plate_Analysis_Delta_Backgrd_afterQC dev.off() #print the plate analysis data before and after QC pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) Plate_Analysis_L_Box Plate_Analysis_L_Box_afterQC Plate_Analysis_K_Box Plate_Analysis_K_Box_afterQC Plate_Analysis_r_Box Plate_Analysis_r_Box_afterQC Plate_Analysis_AUC_Box Plate_Analysis_AUC_Box_afterQC Plate_Analysis_Delta_Backgrd_Box Plate_Analysis_Delta_Backgrd_Box_afterQC dev.off() #remove the zero values and print plate analysis X_noZero <- X[which(X$l > 0),] Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() #print the plate analysis data before and after QC pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) Plate_Analysis_L_afterQC_Z Plate_Analysis_K_afterQC_Z Plate_Analysis_r_afterQC_Z Plate_Analysis_AUC_afterQC_Z Plate_Analysis_Delta_Backgrd_afterQC_Z dev.off() #print the plate analysis data before and after QC pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) Plate_Analysis_L_Box_afterQC_Z Plate_Analysis_K_Box_afterQC_Z Plate_Analysis_r_Box_afterQC_Z Plate_Analysis_AUC_Box_afterQC_Z Plate_Analysis_Delta_Backgrd_Box_afterQC_Z dev.off() #remove dataset with zeros removed rm(X_noZero) #X_test_missing_and_removed <- X[X$Removed == 1,] #calculate summary statistics for all strains, including both background and the deletions X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l,na.rm=TRUE), median_L = median(l,na.rm=TRUE), max_L = max(l,na.rm=TRUE), min_L = min(l,na.rm=TRUE), sd_L = sd(l,na.rm=TRUE), se_L = sd_L / sqrt(N-1), mean_K = mean(K,na.rm=TRUE), median_K = median(K,na.rm=TRUE), max_K = max(K,na.rm=TRUE), min_K = min(K,na.rm=TRUE), sd_K = sd(K,na.rm=TRUE), se_K = sd_K / sqrt(N-1), mean_r = mean(r,na.rm=TRUE), median_r = median(r,na.rm=TRUE), max_r = max(r,na.rm=TRUE), min_r = min(r,na.rm=TRUE), sd_r = sd(r,na.rm=TRUE), se_r = sd_r / sqrt(N-1), mean_AUC = mean(AUC,na.rm=TRUE), median_AUC = median(AUC,na.rm=TRUE), max_AUC = max(AUC,na.rm=TRUE), min_AUC = min(AUC,na.rm=TRUE), sd_AUC = sd(AUC,na.rm=TRUE), se_AUC = sd_AUC / sqrt(N-1) ) #print(X_stats_ALL_L) write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) #+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ##### Part 3 - Generate summary statistics and calculate the max theoretical L value ##### Calculate the Z score at each drug conc for each deletion strain #get the background strains - can be modified to take another argument but for most screens will just be YDL227C Background_Strains <- c("YDL227C") #first part of loop will go through for each background strain #most cases there will only be one YDL227C for(s in Background_Strains){ X_Background <- X[X$OrfRep == s,] #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics #we may want to consider in some cases giving the max high value to L depending on the data type if(table(X_Background$l)[1] == 0){ X_Background[X_Background$l == 0,]$l <- NA X_Background[X_Background$K == 0,]$K <- NA X_Background[X_Background$r == 0,]$r <- NA X_Background[X_Background$AUC == 0,]$AUC <- NA } X_Background <- X_Background[!is.na(X_Background$l),] #get summary stats for L, K, R, AUC X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l,na.rm=TRUE), median = median(l,na.rm=TRUE), max = max(l,na.rm=TRUE), min = min(l,na.rm=TRUE), sd = sd(l,na.rm=TRUE), se = sd / sqrt(N-1) ) print(X_stats_BY_L) X1_SD <- max(X_stats_BY_L$sd) X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, N = (length(K)), mean = mean(K,na.rm=TRUE), median = median(K,na.rm=TRUE), max = max(K,na.rm=TRUE), min = min(K,na.rm=TRUE), sd = sd(K,na.rm=TRUE), se = sd / sqrt(N-1) ) X1_SD_K <- max(X_stats_BY_K$sd) X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, N = length(r), mean = mean(r,na.rm=TRUE), median = median(r,na.rm=TRUE), max = max(r,na.rm=TRUE), min = min(r,na.rm=TRUE), sd = sd(r,na.rm=TRUE), se = sd / sqrt(N-1) ) X1_SD_r <- max(X_stats_BY_r$sd) X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, N = length(AUC), mean = mean(AUC,na.rm=TRUE), median = median(AUC,na.rm=TRUE), max = max(AUC,na.rm=TRUE), min = min(AUC,na.rm=TRUE), sd = sd(AUC,na.rm=TRUE), se = sd / sqrt(N-1) ) X1_SD_AUC <- max(X_stats_BY_AUC$sd) X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l,na.rm=TRUE), median_L = median(l,na.rm=TRUE), max_L = max(l,na.rm=TRUE), min_L = min(l,na.rm=TRUE), sd_L = sd(l,na.rm=TRUE), se_L = sd_L / sqrt(N-1), mean_K = mean(K,na.rm=TRUE), median_K = median(K,na.rm=TRUE), max_K = max(K,na.rm=TRUE), min_K = min(K,na.rm=TRUE), sd_K = sd(K,na.rm=TRUE), se_K = sd_K / sqrt(N-1), mean_r = mean(r,na.rm=TRUE), median_r = median(r,na.rm=TRUE), max_r = max(r,na.rm=TRUE), min_r = min(r,na.rm=TRUE), sd_r = sd(r,na.rm=TRUE), se_r = sd_r / sqrt(N-1), mean_AUC = mean(AUC,na.rm=TRUE), median_AUC = median(AUC,na.rm=TRUE), max_AUC = max(AUC,na.rm=TRUE), min_L = min(AUC,na.rm=TRUE), sd_AUC = sd(AUC,na.rm=TRUE), se_AUC = sd_AUC / sqrt(N-1) ) write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) #calculate the max theoretical L values #only look for max values when K is within 2SD of the ref strain for(q in unique(X$Conc_Num_Factor)){ if(q == 0){ X_within_2SD_K <- X[X$Conc_Num_Factor == q,] X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] } if(q > 0){ X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) } } X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l,na.rm=TRUE), min = min(l,na.rm=TRUE), sd = sd(l), se = sd / sqrt(N-1), z_max = (max-mean)/sd ) print(X_stats_BY_L_within_2SD_K) X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l,na.rm=TRUE), min = min(l,na.rm=TRUE), sd = sd(l), se = sd / sqrt(N-1) ) print(X_stats_BY_L_outside_2SD_K) X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) print(Outside_2SD_K_L_vs_K) dev.off() pgg <- ggplotly(Outside_2SD_K_L_vs_K) plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") saveWidget(pgg, file=plotly_path, selfcontained =TRUE) Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) Outside_2SD_K_delta_background_vs_K dev.off() pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) #pgg plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") saveWidget(pgg, file=plotly_path, selfcontained =TRUE) #get the background strain mean values at the no drug conc to calculate shift Background_L <- X_stats_BY_L$mean[1] Background_K <- X_stats_BY_K$mean[1] Background_r <- X_stats_BY_r$mean[1] Background_AUC <- X_stats_BY_AUC$mean[1] #create empty plots for plotting element p_l <- ggplot() p_K <- ggplot() p_r <- ggplot() p_AUC <- ggplot() p_rf_l <- ggplot() p_rf_K <- ggplot() p_rf_r <- ggplot() p_rf_AUC <- ggplot() #get only the deletion strains X2 <- X X2 <- X2[X2$OrfRep != "YDL227C",] #if set to max theoretical value, add a 1 to SM, if not, leave as 0 #SM = Set to Max X2$SM <- 0 #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min for(i in 1:length(unique(X2$Conc_Num))){ Concentration <- unique(X2$Conc_Num)[i] X2_temp <- X2[X2$Conc_Num == Concentration,] if(Concentration == 0){ X2_new <- X2_temp print(paste("Check loop order, conc =",Concentration,sep=" ")) } if(Concentration > 0){ try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] print(paste("Check loop order, conc =",Concentration,sep=" ")) X2_new <- rbind(X2_new,X2_temp) } } X2 <- X2_new #get only the RF strains X2_RF <- X X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] #if set to max theoretical value, add a 1 to SM, if not, leave as 0 #SM = Set to Max X2_RF$SM <- 0 #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min for(i in 1:length(unique(X2_RF$Conc_Num))){ Concentration <- unique(X2_RF$Conc_Num)[i] X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] if(Concentration == 0){ X2_RF_new <- X2_RF_temp print(paste("Check loop order, conc =",Concentration,sep=" ")) } if(Concentration > 0){ try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) } } X2_RF <- X2_RF_new #######Part 4 Get the RF Z score values #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") num_genes_RF <- length(unique(X2_RF$OrfRep)) print(num_genes_RF) #create the output data.frame containing columns for each RF strain InteractionScores_RF <- unique(X2_RF["OrfRep"]) #InteractionScores_RF$Gene <- unique(X2$Gene) InteractionScores_RF$Gene <- NA InteractionScores_RF$Raw_Shift_L <- NA InteractionScores_RF$Z_Shift_L <- NA InteractionScores_RF$lm_Score_L <- NA InteractionScores_RF$Z_lm_L <- NA InteractionScores_RF$R_Squared_L <- NA InteractionScores_RF$Sum_Z_Score_L <- NA InteractionScores_RF$Avg_Zscore_L <- NA InteractionScores_RF$Raw_Shift_K <- NA InteractionScores_RF$Z_Shift_K <- NA InteractionScores_RF$lm_Score_K <- NA InteractionScores_RF$Z_lm_K <- NA InteractionScores_RF$R_Squared_K <- NA InteractionScores_RF$Sum_Z_Score_K <- NA InteractionScores_RF$Avg_Zscore_K <- NA InteractionScores_RF$Raw_Shift_r <- NA InteractionScores_RF$Z_Shift_r <- NA InteractionScores_RF$lm_Score_r <- NA InteractionScores_RF$Z_lm_r <- NA InteractionScores_RF$R_Squared_r <- NA InteractionScores_RF$Sum_Z_Score_r <- NA InteractionScores_RF$Avg_Zscore_r <- NA InteractionScores_RF$Raw_Shift_AUC <- NA InteractionScores_RF$Z_Shift_AUC <- NA InteractionScores_RF$lm_Score_AUC <- NA InteractionScores_RF$Z_lm_AUC <- NA InteractionScores_RF$R_Squared_AUC <- NA InteractionScores_RF$Sum_Z_Score_AUC <- NA InteractionScores_RF$Avg_Zscore_AUC <- NA InteractionScores_RF$NG <- NA InteractionScores_RF$SM <- NA for(i in 1:num_genes_RF){ #get each deletion strain ORF Gene_Sel <- unique(X2_RF$OrfRep)[i] #extract only the current deletion strain and its data X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l,na.rm = TRUE), median_L = median(l,na.rm = TRUE), sd_L = sd(l,na.rm = TRUE), se_L = sd_L / sqrt(N-1), mean_K = mean(K,na.rm = TRUE), median_K = median(K,na.rm = TRUE), sd_K = sd(K,na.rm = TRUE), se_K = sd_K / sqrt(N-1), mean_r = mean(r,na.rm = TRUE), median_r = median(r,na.rm = TRUE), sd_r = sd(r,na.rm = TRUE), se_r = sd_r / sqrt(N-1), mean_AUC = mean(AUC,na.rm = TRUE), median_AUC = median(AUC,na.rm = TRUE), sd_AUC = sd(AUC,na.rm = TRUE), se_AUC = sd_AUC / sqrt(N-1), NG = sum(NG,na.rm=TRUE), DB= sum(DB,na.rm=TRUE), SM = sum(SM,na.rm=TRUE) ) #Get shift vals #if L is 0, that means the no growth on no drug #if L is NA at 0, that means the spot was removed due to contamination #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift #otherwise calculate shift at no drug conc if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ X_stats_interaction$Raw_Shift_L <- 0 X_stats_interaction$Raw_Shift_K <- 0 X_stats_interaction$Raw_Shift_r <- 0 X_stats_interaction$Raw_Shift_AUC <- 0 X_stats_interaction$Z_Shift_L <- 0 X_stats_interaction$Z_Shift_K <- 0 X_stats_interaction$Z_Shift_r <- 0 X_stats_interaction$Z_Shift_AUC <- 0 }else{ X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] } #get WT vals X_stats_interaction$WT_l <- X_stats_BY_L$mean X_stats_interaction$WT_K <- X_stats_BY_K$mean X_stats_interaction$WT_r <- X_stats_BY_r$mean X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean #Get WT SD X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd #only get scores if there's growth at no drug if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ #calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC #calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC #disregard shift for no growth values in Z score calculation if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC } #disregard shift for set to max values in Z score calculation if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l #only calculate the L value without shift since L is the only adjusted value #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC } #calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) #get linear model gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) #get interaction score calculated by linear model and R-squared value for the fit gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] r_squared_l <- summary(gene_lm_L)$r.squared gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] r_squared_K <- summary(gene_lm_K)$r.squared gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] r_squared_AUC <- summary(gene_lm_AUC)$r.squared #Get total of non removed values Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 #report the scores InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) } if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ #calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC #calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC #disregard shift for missing values in Z score calculatiom if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC } #disregard shift for set to max values in Z score calculation if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l #only calculate the L value without shift since L is the only adjusted value #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC } #calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) #NA values for the next part since there's an NA or 0 at the no drug. gene_lm_L <- NA gene_lm_K <- NA gene_lm_r <- NA gene_lm_AUC <- NA gene_interaction_L <- NA r_squared_l <- NA gene_interaction_K <- NA r_squared_K <- NA gene_interaction_r <- NA r_squared_r <- NA gene_interaction_AUC <- NA r_squared_AUC <- NA X_stats_interaction$Raw_Shift_L <- NA X_stats_interaction$Raw_Shift_K <- NA X_stats_interaction$Raw_Shift_r <- NA X_stats_interaction$Raw_Shift_AUC <- NA X_stats_interaction$Z_Shift_L <- NA X_stats_interaction$Z_Shift_K <- NA X_stats_interaction$Z_Shift_r <- NA X_stats_interaction$Z_Shift_AUC <- NA InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA } if(i == 1){ X_stats_interaction_ALL_RF <- X_stats_interaction } if(i > 1){ X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) } InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) } print("Pass RF Calculation loop") lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) print(paste("Mean RF linear regression score L",lm_mean_L)) InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) for(i in 1:num_genes_RF){ Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-65,65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + theme_Publication() p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-65,65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + theme_Publication() p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-0.65,0.65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + theme_Publication() p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-6500,6500)) + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + theme_Publication() if(i == 1){ X_stats_interaction_ALL_RF_final <- X_ZCalculations } if(i > 1){ X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) } } print("Pass RF ggplot loop") write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) ####### Part 5 - Get Zscores for Gene deletion strains #get total number of genes for the next loop num_genes <- length(unique(X2$OrfRep)) print(num_genes) #create the output data.frame containing columns for each deletion strain InteractionScores <- unique(X2["OrfRep"]) #InteractionScores$Gene <- unique(X2$Gene) InteractionScores$Gene <- NA InteractionScores$Raw_Shift_L <- NA InteractionScores$Z_Shift_L <- NA InteractionScores$lm_Score_L <- NA InteractionScores$Z_lm_L <- NA InteractionScores$R_Squared_L <- NA InteractionScores$Sum_Z_Score_L <- NA InteractionScores$Avg_Zscore_L <- NA InteractionScores$Raw_Shift_K <- NA InteractionScores$Z_Shift_K <- NA InteractionScores$lm_Score_K <- NA InteractionScores$Z_lm_K <- NA InteractionScores$R_Squared_K <- NA InteractionScores$Sum_Z_Score_K <- NA InteractionScores$Avg_Zscore_K <- NA InteractionScores$Raw_Shift_r <- NA InteractionScores$Z_Shift_r <- NA InteractionScores$lm_Score_r <- NA InteractionScores$Z_lm_r <- NA InteractionScores$R_Squared_r <- NA InteractionScores$Sum_Z_Score_r <- NA InteractionScores$Avg_Zscore_r <- NA InteractionScores$Raw_Shift_AUC <- NA InteractionScores$Z_Shift_AUC <- NA InteractionScores$lm_Score_AUC <- NA InteractionScores$Z_lm_AUC <- NA InteractionScores$R_Squared_AUC <- NA InteractionScores$Sum_Z_Score_AUC <- NA InteractionScores$Avg_Zscore_AUC <- NA InteractionScores$NG <- NA InteractionScores$DB <- NA InteractionScores$SM <- NA for(i in 1:num_genes){ #get each deletion strain ORF Gene_Sel <- unique(X2$OrfRep)[i] #extract only the current deletion strain and its data X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l,na.rm = TRUE), median_L = median(l,na.rm = TRUE), sd_L = sd(l,na.rm = TRUE), se_L = sd_L / sqrt(N-1), mean_K = mean(K,na.rm = TRUE), median_K = median(K,na.rm = TRUE), sd_K = sd(K,na.rm = TRUE), se_K = sd_K / sqrt(N-1), mean_r = mean(r,na.rm = TRUE), median_r = median(r,na.rm = TRUE), sd_r = sd(r,na.rm = TRUE), se_r = sd_r / sqrt(N-1), mean_AUC = mean(AUC,na.rm = TRUE), median_AUC = median(AUC,na.rm = TRUE), sd_AUC = sd(AUC,na.rm = TRUE), se_AUC = sd_AUC / sqrt(N-1), NG = sum(NG,na.rm=TRUE), DB= sum(DB,na.rm=TRUE), SM = sum(SM,na.rm=TRUE) ) #Get shift vals #if L is 0, that means the no growth on no drug #if L is NA at 0, that means the spot was removed due to contamination #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift #otherwise calculate shift at no drug conc if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ X_stats_interaction$Raw_Shift_L <- 0 X_stats_interaction$Raw_Shift_K <- 0 X_stats_interaction$Raw_Shift_r <- 0 X_stats_interaction$Raw_Shift_AUC <- 0 X_stats_interaction$Z_Shift_L <- 0 X_stats_interaction$Z_Shift_K <- 0 X_stats_interaction$Z_Shift_r <- 0 X_stats_interaction$Z_Shift_AUC <- 0 }else{ X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] } #get WT vals X_stats_interaction$WT_l <- X_stats_BY_L$mean X_stats_interaction$WT_K <- X_stats_BY_K$mean X_stats_interaction$WT_r <- X_stats_BY_r$mean X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean #Get WT SD X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd #only get scores if there's growth at no drug if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ #calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC #calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC #disregard shift for no growth values in Z score calculation if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC } #disregard shift for set to max values in Z score calculation if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l #only calculate the L value without shift since L is the only adjusted value #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC } #calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) #get linear model gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) #get interaction score calculated by linear model and R-squared value for the fit gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] r_squared_l <- summary(gene_lm_L)$r.squared gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] r_squared_K <- summary(gene_lm_K)$r.squared gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] r_squared_AUC <- summary(gene_lm_AUC)$r.squared #Get total of non removed values Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 #report the scores InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) } if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ #calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC #calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC #disregard shift for missing values in Z score calculatiom if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC } #disregard shift for set to max values in Z score calculation if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l #only calculate the L value without shift since L is the only adjusted value #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC } #calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) #NA values for the next part since there's an NA or 0 at the no drug. gene_lm_L <- NA gene_lm_K <- NA gene_lm_r <- NA gene_interaction_L <- NA r_squared_l <- NA gene_interaction_K <- NA r_squared_K <- NA gene_interaction_r <- NA r_squared_r <- NA X_stats_interaction$Raw_Shift_L <- NA X_stats_interaction$Raw_Shift_K <- NA X_stats_interaction$Raw_Shift_r <- NA X_stats_interaction$Raw_Shift_AUC <- NA X_stats_interaction$Z_Shift_L <- NA X_stats_interaction$Z_Shift_K <- NA X_stats_interaction$Z_Shift_r <- NA X_stats_interaction$Z_Shift_AUC <- NA InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA } if(i == 1){ X_stats_interaction_ALL <- X_stats_interaction } if(i > 1){ X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) } InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) } print("Pass Int Calculation loop") InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] df_order_by_OrfRep <- unique(InteractionScores$OrfRep) #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) #get enhancers and suppressors for linear regression InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) print('ln 1570 write StudyInfo.csv after ') #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) for(i in 1:num_genes){ Gene_Sel <- unique(InteractionScores$OrfRep)[i] X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-65,65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + theme_Publication() p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-65,65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + theme_Publication() p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-0.65,0.65)) + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + theme_Publication() p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + coord_cartesian(ylim = c(-6500,6500)) + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + theme_Publication() if(i == 1){ X_stats_interaction_ALL_final <- X_ZCalculations } if(i > 1){ X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) } } print("Pass Int ggplot loop") write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) Blank <- ggplot(X2_RF) + geom_blank() pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, mean_L = mean(l,na.rm=TRUE), median_L = median(l,na.rm=TRUE), max_L = max(l,na.rm=TRUE), min_L = min(l,na.rm=TRUE), sd_L = sd(l,na.rm=TRUE), mean_K = mean(K,na.rm=TRUE), median_K = median(K,na.rm=TRUE), max_K = max(K,na.rm=TRUE), min_K = min(K,na.rm=TRUE), sd_K = sd(K,na.rm=TRUE), mean_r = mean(r,na.rm=TRUE), median_r = median(r,na.rm=TRUE), max_r = max(r,na.rm=TRUE), min_r = min(r,na.rm=TRUE), sd_r = sd(r,na.rm=TRUE), mean_AUC = mean(AUC,na.rm=TRUE), median_AUC = median(AUC,na.rm=TRUE), max_AUC = max(AUC,na.rm=TRUE), min_AUC = min(AUC,na.rm=TRUE), sd_AUC = sd(AUC,na.rm=TRUE), NG = sum(NG,na.rm=TRUE), DB = sum(DB,na.rm=TRUE), SM = sum(SM,na.rm=TRUE) ) L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + annotate("text",x=-0.25,y=10,label="NG") + annotate("text",x=-0.25,y=5,label="DB") + annotate("text",x=-0.25,y=0,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + theme_Publication() K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + annotate("text",x=-0.25,y=-5,label="NG") + annotate("text",x=-0.25,y=-12.5,label="DB") + annotate("text",x=-0.25,y=-20,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + theme_Publication() R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + annotate("text",x=-0.25,y=.9,label="NG") + annotate("text",x=-0.25,y=.8,label="DB") + annotate("text",x=-0.25,y=.7,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + theme_Publication() AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + annotate("text",x=-0.25,y=11000,label="NG") + annotate("text",x=-0.25,y=10000,label="DB") + annotate("text",x=-0.25,y=9000,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + theme_Publication() L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + theme_Publication() K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + theme_Publication() r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + theme_Publication() AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + theme_Publication() grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) #plot the references #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) #loop for grid arrange 4x3 j <- rep(1,((num_genes)/3)-1) for(n in 1:length(j)){ j[n+1] <- n*3 + 1 } #loop for printing each plot num <- 0 for(m in 1:(round((num_genes)/3)-1)){ num <- j[m] grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] } if(num_genes != (num+2)){ total_num = num_genes - (num+2) if(total_num == 5){ grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) } if(total_num == 4){ grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) } if(total_num == 3){ grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } if(total_num == 2){ grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } if(total_num == 1){ grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } } dev.off() pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, mean_L = mean(l,na.rm=TRUE), median_L = median(l,na.rm=TRUE), max_L = max(l,na.rm=TRUE), min_L = min(l,na.rm=TRUE), sd_L = sd(l,na.rm=TRUE), mean_K = mean(K,na.rm=TRUE), median_K = median(K,na.rm=TRUE), max_K = max(K,na.rm=TRUE), min_K = min(K,na.rm=TRUE), sd_K = sd(K,na.rm=TRUE), mean_r = mean(r,na.rm=TRUE), median_r = median(r,na.rm=TRUE), max_r = max(r,na.rm=TRUE), min_r = min(r,na.rm=TRUE), sd_r = sd(r,na.rm=TRUE), mean_AUC = mean(AUC,na.rm=TRUE), median_AUC = median(AUC,na.rm=TRUE), max_AUC = max(AUC,na.rm=TRUE), min_AUC = min(AUC,na.rm=TRUE), sd_AUC = sd(AUC,na.rm=TRUE), NG = sum(NG,na.rm=TRUE), DB = sum(DB,na.rm=TRUE), SM = sum(SM,na.rm=TRUE) ) L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + annotate("text",x=-0.25,y=10,label="NG") + annotate("text",x=-0.25,y=5,label="DB") + annotate("text",x=-0.25,y=0,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + theme_Publication() K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + annotate("text",x=-0.25,y=-5,label="NG") + annotate("text",x=-0.25,y=-12.5,label="DB") + annotate("text",x=-0.25,y=-20,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + theme_Publication() R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + annotate("text",x=-0.25,y=.9,label="NG") + annotate("text",x=-0.25,y=.8,label="DB") + annotate("text",x=-0.25,y=.7,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + theme_Publication() AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + annotate("text",x=-0.25,y=11000,label="NG") + annotate("text",x=-0.25,y=10000,label="DB") + annotate("text",x=-0.25,y=9000,label="SM") + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + theme_Publication() L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + theme_Publication() K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + theme_Publication() r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + theme_Publication() AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + theme_Publication() grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) #plot the references #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) #loop for grid arrange 4x3 j <- rep(1,((num_genes_RF)/3)-1) for(n in 1:length(j)){ j[n+1] <- n*3 + 1 } #loop for printing each plot num <- 0 for(m in 1:(round((num_genes_RF)/3)-1)){ num <- j[m] grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] } if(num_genes_RF != (num+2)){ total_num = num_genes_RF - (num+2) if(total_num == 5){ grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) } if(total_num == 4){ grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) } if(total_num == 3){ grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } if(total_num == 2){ grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } if(total_num == 1){ grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) } } dev.off() #print rank plots for L and K gene interactions InteractionScores_AdjustMissing <- InteractionScores InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 InteractionScores_AdjustMissing$L_Rank <- NA InteractionScores_AdjustMissing$K_Rank <- NA InteractionScores_AdjustMissing$r_Rank <- NA InteractionScores_AdjustMissing$AUC_Rank <- NA InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) # InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 InteractionScores_AdjustMissing$L_Rank_lm <- NA InteractionScores_AdjustMissing$K_Rank_lm <- NA InteractionScores_AdjustMissing$r_Rank_lm <- NA InteractionScores_AdjustMissing$AUC_Rank_lm <- NA InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + theme_Publication() Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + theme_Publication() Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + theme_Publication() Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + theme_Publication() Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + theme_Publication() Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + theme_Publication() Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) dev.off() Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + theme_Publication() Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + theme_Publication() Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + theme_Publication() Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + theme_Publication() Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + theme_Publication() Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + theme_Publication() Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) dev.off() X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] #find overlaps X_NArm$Overlap <- "No Effect" try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) L_lm <- summary(get_lm_L) get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) K_lm <- summary(get_lm_K) get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) r_lm <- summary(get_lm_r) get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) AUC_lm <- summary(get_lm_AUC) pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm K") + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm r") + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm AUC") + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) dev.off() lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() pgg <- ggplotly(lm_v_Zscore_L) #pgg plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") saveWidget(pgg, file=plotly_path, selfcontained =TRUE) X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) L_lm2 <- summary(get_lm_L2) get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) K_lm2 <- summary(get_lm_K2) get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) r_lm2 <- summary(get_lm_r2) get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) AUC_lm2 <- summary(get_lm_AUC2) num_genes_NArm2 <- (dim(X_NArm)[1])/2 pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Rank Avg Zscore vs lm L") + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Rank Avg Zscore vs lm K") + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Rank Avg Zscore vs lm r") + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + ggtitle("Rank of Avg Zscore vs lm AUC") + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) dev.off() Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + theme_Publication() Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + theme_Publication() Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + theme_Publication() Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + theme_Publication() Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + theme_Publication() Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + theme_Publication() Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) dev.off() Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + theme_Publication() Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + theme_Publication() Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + theme_Publication() Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + theme_Publication() Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + theme_Publication() Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + theme_Publication() Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + theme_Publication() Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + theme_Publication() pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) dev.off() } #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) L_lm_1 <- summary(get_lm_1) get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) L_lm_2 <- summary(get_lm_2) get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) L_lm_3 <- summary(get_lm_3) get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) L_lm_4 <- summary(get_lm_4) get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) L_lm_5 <- summary(get_lm_5) get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) L_lm_6 <- summary(get_lm_6) pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_smooth(method="lm",color="tomato3") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) dev.off() #write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) timestamp() #BoneYard*********************************************** #I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. #take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. #Corruptable but better than nothing. #if(is.na(Args[2])){ # std=3 #}else { # std= Arg[2] #Delta_Background_sdFactor <- 2 #Args[3] #delBGFactor <- as.numeric(Delta_Background_sdFactor) #}