2722 lines
165 KiB
R
2722 lines
165 KiB
R
#!/usr/bin/env Rscript
|
|
# Based on InteractionTemplate.R which is based on Sean Santos's Interaction_V5 script
|
|
#
|
|
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
|
# NOTE: The script now has 4 additional OPTIONAL arguments:
|
|
# 1. Path to input file
|
|
# 2. /output/ directory
|
|
# 3. Path to StudyInfo.csv
|
|
# 4. Path to SGDgeneList
|
|
# 5. Standard deviation value
|
|
|
|
# Load libraries
|
|
library("ggplot2")
|
|
library("plyr")
|
|
library("extrafont")
|
|
library("gridExtra")
|
|
library("gplots")
|
|
library("RColorBrewer")
|
|
library("stringr")
|
|
library("gdata")
|
|
library("plotly")
|
|
library("htmlwidgets")
|
|
|
|
# Parse arguments
|
|
args <- commandArgs(TRUE)
|
|
inputFile <- args[1]
|
|
|
|
# Set output dir
|
|
if (length(args) > 2) {
|
|
outDir <- args[2]
|
|
} else {
|
|
outDir <- "/ZScores/" # for legacy workflow
|
|
}
|
|
|
|
# Set StudyInfo file path
|
|
if (length(args) > 3) {
|
|
studyInfo <- args[3]
|
|
} else {
|
|
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
|
|
}
|
|
|
|
# Set SGDgeneList file path
|
|
if (length(args) > 4) {
|
|
SGDgeneList <- args[4]
|
|
} else {
|
|
SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow
|
|
}
|
|
|
|
# Set standard deviation
|
|
if (length(args) > 5) {
|
|
delBGFactor <- args[5]
|
|
} else {
|
|
# User prompt for std multiplier Value
|
|
print("Enter a Standard Deviation value for noise filter")
|
|
print("Sean Santos recommends 3 or 5")
|
|
delBGFactor <- readLines(file("stdin"), n = 1L)
|
|
}
|
|
delBGFactor <- as.numeric(delBGFactor)
|
|
if(is.na(delBGFactor)){
|
|
delBGFactor <- 3 # Recommended by Sean
|
|
}
|
|
print(paste("The Standard Deviation Value is: ", delBGFactor))
|
|
|
|
outDir_QC <- paste(outDir,"QC/",sep="")
|
|
|
|
if !(file.exists(outDir)){
|
|
dir.create(file.path(outDir))
|
|
}
|
|
|
|
if !(file.exists(outDir_QC)){
|
|
dir.create(file.path(outDir_QC))
|
|
}
|
|
|
|
# Capture Exp_ number,use it to Save args[2]{std}to Labels field and then
|
|
# write to Labels to studyInfo.txt for future reference
|
|
Labels <- read.csv(file=studyInfo,stringsAsFactors = FALSE) # sep= ","
|
|
expNumber <- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd()))
|
|
Labels[expNumber,3] <- delBGFactor
|
|
write.csv(Labels,file=studyInfo,row.names = FALSE)
|
|
|
|
###############################################################################
|
|
################### BEGIN USER DATA SELECTION SECTION #########################
|
|
###############################################################################
|
|
|
|
#read in the data
|
|
X <- read.delim(inputFile,skip=2,as.is=T,row.names=1,strip.white=TRUE)
|
|
X <- X[!(X[[1]]%in%c("","Scan")),]
|
|
#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76
|
|
|
|
#X <- X[which(X$Specifics == "WT"),]
|
|
|
|
#X_length <- length(X[1,])
|
|
#X_end <- length(X[1,]) - 2
|
|
#X <- X[,c(1:42,X_end:X_length)]
|
|
|
|
|
|
#use numeric data to perform operations
|
|
X$Col <- as.numeric(X$Col)
|
|
X$Row <- as.numeric(X$Row)
|
|
X$l <- as.numeric(X$l)
|
|
X$K <- as.numeric(X$K)
|
|
X$r <- as.numeric(X$r)
|
|
X$Scan <- as.numeric(X$Scan)
|
|
X$AUC <- as.numeric(X$AUC)
|
|
X$LstBackgrd <- as.numeric(X$LstBackgrd)
|
|
X$X1stBackgrd <- as.numeric(X$X1stBackgrd)
|
|
|
|
#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col
|
|
#as was the case in Gemcitabin and Cytarabin experiments.
|
|
#The following allows user to rename columns so as to get the appropriate
|
|
#data where it needs to be for the script to run properly.
|
|
#colnames(X)[7] <- "Modifier1"
|
|
#colnames(X)[8] <- "Conc1"
|
|
#colnames(X)[10] <- "Drug"
|
|
#colnames(X)[11] <- "Conc"
|
|
|
|
#set the OrfRep to YDL227C for the ref data
|
|
X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C"
|
|
#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used.
|
|
#That is the first DM plates are removed from the data set with the following.
|
|
#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin
|
|
X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment
|
|
|
|
#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates.
|
|
#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used.
|
|
#try(X <- X[X$ORF != "bad_spot",])
|
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++=
|
|
|
|
#get total number of drug concentrations
|
|
Total_Conc_Nums <- length(unique(X$Conc))
|
|
|
|
#function to ID numbers in string with characters+numbers (ie to get numeric drug conc)
|
|
numextract <- function(string){
|
|
str_extract(string, "\\-*\\d+\\.*\\d*")
|
|
}
|
|
|
|
#generate a new column with the numeric drug concs
|
|
X$Conc_Num <- as.numeric(numextract(X$Conc))
|
|
#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later
|
|
X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1
|
|
|
|
#Get the max factor for concentration
|
|
MAX_CONC <- max(X$Conc_Num_Factor)
|
|
#if treating numbers not as factors uncomment next line and comment out previous line
|
|
#MAX_CONC <- max(X$Conc_Num)
|
|
|
|
#remove wells with problems for making graphs and to not include in summary statistics
|
|
X <- X[X$Gene != "BLANK",]
|
|
X <- X[X$Gene != "Blank",]
|
|
X <- X[X$ORF != "Blank",]
|
|
X <- X[X$Gene != "blank",]
|
|
#X <- X[X$Gene != "HO",]
|
|
Xbu= X
|
|
#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
|
|
#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results
|
|
#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor.
|
|
genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11))))
|
|
for(i in 1:length(X[,14])){
|
|
ii= as.numeric(i)
|
|
line_num = match(X[ii,14],genes[,1],nomatch=1)
|
|
OrfRepColNum= as.numeric(match('OrfRep',names(X)))
|
|
if(X[ii,OrfRepColNum]!= "YDL227C"){
|
|
X[ii,15] = genes[line_num,2]
|
|
}
|
|
if((X[ii,15] == "")||(X[ii,15] == "OCT1")){
|
|
X[ii,15] = X[ii,OrfRepColNum]
|
|
}
|
|
}
|
|
Xblankreplace= X
|
|
#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing
|
|
|
|
#Remove dAmPs *******************************
|
|
#jlh confirmed to leave dAmps in so comment out this section
|
|
#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt"
|
|
#Damps <- read.delim(DAmPs_List,header=F)
|
|
|
|
#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1]
|
|
#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control
|
|
# ***********
|
|
|
|
|
|
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++
|
|
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
print("ln137 End of User Section including blank gene writeOver")
|
|
#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++
|
|
#theme elements for plots
|
|
theme_Publication <- function(base_size=14, base_family="sans") {
|
|
library(grid)
|
|
library(ggthemes)
|
|
(theme_foundation(base_size=base_size, base_family=base_family)
|
|
+ theme(plot.title = element_text(face = "bold",
|
|
size = rel(1.2), hjust = 0.5),
|
|
text = element_text(),
|
|
panel.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold",size = rel(1)),
|
|
axis.title.y = element_text(angle=90,vjust =2),
|
|
axis.title.x = element_text(vjust = -0.2),
|
|
axis.text = element_text(),
|
|
axis.line = element_line(colour="black"),
|
|
axis.ticks = element_line(),
|
|
panel.grid.major = element_line(colour="#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
legend.key = element_rect(colour = NA),
|
|
legend.position = "bottom",
|
|
legend.direction = "horizontal",
|
|
legend.key.size= unit(0.2, "cm"),
|
|
legend.spacing = unit(0, "cm"),
|
|
legend.title = element_text(face="italic"),
|
|
plot.margin=unit(c(10,5,5,5),"mm"),
|
|
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
|
strip.text = element_text(face="bold")
|
|
))
|
|
|
|
}
|
|
|
|
scale_fill_Publication <- function(...){
|
|
library(scales)
|
|
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
scale_colour_Publication <- function(...){
|
|
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
|
|
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
|
(theme_foundation(base_size=base_size, base_family=base_family)
|
|
+ theme(plot.title = element_text(face = "bold",
|
|
size = rel(1.2), hjust = 0.5),
|
|
text = element_text(),
|
|
panel.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold",size = rel(1)),
|
|
axis.title.y = element_text(angle=90,vjust =2),
|
|
axis.title.x = element_text(vjust = -0.2),
|
|
axis.text = element_text(),
|
|
axis.line = element_line(colour="black"),
|
|
axis.ticks = element_line(),
|
|
panel.grid.major = element_line(colour="#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
legend.key = element_rect(colour = NA),
|
|
legend.position = "right",
|
|
legend.direction = "vertical",
|
|
legend.key.size= unit(0.5, "cm"),
|
|
legend.spacing = unit(0, "cm"),
|
|
legend.title = element_text(face="italic"),
|
|
plot.margin=unit(c(10,5,5,5),"mm"),
|
|
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
|
strip.text = element_text(face="bold")
|
|
))
|
|
|
|
}
|
|
|
|
scale_fill_Publication <- function(...){
|
|
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
scale_colour_Publication <- function(...){
|
|
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
|
|
#print timestamp for initial time the code starts
|
|
timestamp()
|
|
#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
###Part 2 - Quality control
|
|
#print quality control graphs for each dataset before removing data due to contamination
|
|
#and before adjusting missing data to max theoretical values
|
|
|
|
#plate analysis plot
|
|
#plate analysis is a quality check to identify plate effects containing anomalies
|
|
|
|
Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication()
|
|
|
|
|
|
|
|
Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication()
|
|
|
|
#quality control - values with a high delta background likely have heavy contamination
|
|
#check the frequency of these values
|
|
#report the L and K values of these spots
|
|
#report the number to be removed based on the Delta_Background_Tolerance
|
|
X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd
|
|
|
|
|
|
#raw l vs K before QC
|
|
Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
|
|
ggtitle("Raw L vs K before QC") +
|
|
theme_Publication_legend_right()
|
|
pdf(paste(outDir_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8)
|
|
Raw_l_vs_K_beforeQC
|
|
dev.off()
|
|
pgg <- ggplotly(Raw_l_vs_K_beforeQC)
|
|
#pgg
|
|
plotly_path <- paste(outDir_QC,"Raw_L_vs_K_beforeQC.html",sep="")
|
|
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
|
|
|
|
|
|
#set delta background tolerance based on 3 sds from the mean delta background
|
|
Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(delBGFactor*sd(X$Delta_Backgrd))
|
|
#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd))
|
|
print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" "))
|
|
|
|
Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication()
|
|
|
|
|
|
|
|
X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,]
|
|
|
|
X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2
|
|
X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2
|
|
X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1]
|
|
|
|
X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
|
|
ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) +
|
|
annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian,
|
|
label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) +
|
|
theme_Publication_legend_right()
|
|
pdf(paste(outDir_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8)
|
|
X_Delta_Backgrd_above_Tolerance_L_vs_K
|
|
dev.off()
|
|
pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K)
|
|
#pgg
|
|
plotly_path <- paste(outDir_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="")
|
|
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
|
|
|
|
#frequency plot for all data vs. the delta_background
|
|
DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() +
|
|
ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right()
|
|
|
|
#bar plot for all data vs. the delta_background
|
|
DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() +
|
|
ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right()
|
|
|
|
pdf(file = paste(outDir_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8)
|
|
print(DeltaBackground_Frequency_Plot)
|
|
print(DeltaBackground_Bar_Plot)
|
|
dev.off()
|
|
|
|
|
|
#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values
|
|
#1 for missing data, 0 for non missing data
|
|
#Use "NG" for NoGrowth rather than "missing"
|
|
X$NG <- 0
|
|
try(X[X$l == 0 & !is.na(X$l),]$NG <- 1)
|
|
|
|
#1 for removed data, 0 non removed data
|
|
#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed"
|
|
X$DB <- 0
|
|
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1)
|
|
|
|
#replace the CPPs for l, r, AUC and K (must be last!) for removed data
|
|
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA)
|
|
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA)
|
|
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA)
|
|
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA)
|
|
|
|
|
|
Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
|
|
|
|
|
|
Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
|
|
|
|
#print the plate analysis data before and after QC
|
|
pdf(file=paste(outDir_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9)
|
|
Plate_Analysis_L
|
|
Plate_Analysis_L_afterQC
|
|
Plate_Analysis_K
|
|
Plate_Analysis_K_afterQC
|
|
Plate_Analysis_r
|
|
Plate_Analysis_r_afterQC
|
|
Plate_Analysis_AUC
|
|
Plate_Analysis_AUC_afterQC
|
|
Plate_Analysis_Delta_Backgrd
|
|
Plate_Analysis_Delta_Backgrd_afterQC
|
|
dev.off()
|
|
|
|
#print the plate analysis data before and after QC
|
|
pdf(file=paste(outDir_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9)
|
|
Plate_Analysis_L_Box
|
|
Plate_Analysis_L_Box_afterQC
|
|
Plate_Analysis_K_Box
|
|
Plate_Analysis_K_Box_afterQC
|
|
Plate_Analysis_r_Box
|
|
Plate_Analysis_r_Box_afterQC
|
|
Plate_Analysis_AUC_Box
|
|
Plate_Analysis_AUC_Box_afterQC
|
|
Plate_Analysis_Delta_Backgrd_Box
|
|
Plate_Analysis_Delta_Backgrd_Box_afterQC
|
|
dev.off()
|
|
|
|
#remove the zero values and print plate analysis
|
|
X_noZero <- X[which(X$l > 0),]
|
|
Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
|
|
|
|
|
|
Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication()
|
|
|
|
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() +
|
|
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication()
|
|
|
|
#print the plate analysis data before and after QC
|
|
pdf(file=paste(outDir_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9)
|
|
Plate_Analysis_L_afterQC_Z
|
|
Plate_Analysis_K_afterQC_Z
|
|
Plate_Analysis_r_afterQC_Z
|
|
Plate_Analysis_AUC_afterQC_Z
|
|
Plate_Analysis_Delta_Backgrd_afterQC_Z
|
|
dev.off()
|
|
|
|
#print the plate analysis data before and after QC
|
|
pdf(file=paste(outDir_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9)
|
|
Plate_Analysis_L_Box_afterQC_Z
|
|
Plate_Analysis_K_Box_afterQC_Z
|
|
Plate_Analysis_r_Box_afterQC_Z
|
|
Plate_Analysis_AUC_Box_afterQC_Z
|
|
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z
|
|
dev.off()
|
|
|
|
#remove dataset with zeros removed
|
|
rm(X_noZero)
|
|
|
|
|
|
#X_test_missing_and_removed <- X[X$Removed == 1,]
|
|
|
|
#calculate summary statistics for all strains, including both background and the deletions
|
|
X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean_L = mean(l,na.rm=TRUE),
|
|
median_L = median(l,na.rm=TRUE),
|
|
max_L = max(l,na.rm=TRUE),
|
|
min_L = min(l,na.rm=TRUE),
|
|
sd_L = sd(l,na.rm=TRUE),
|
|
se_L = sd_L / sqrt(N-1),
|
|
mean_K = mean(K,na.rm=TRUE),
|
|
median_K = median(K,na.rm=TRUE),
|
|
max_K = max(K,na.rm=TRUE),
|
|
min_K = min(K,na.rm=TRUE),
|
|
sd_K = sd(K,na.rm=TRUE),
|
|
se_K = sd_K / sqrt(N-1),
|
|
mean_r = mean(r,na.rm=TRUE),
|
|
median_r = median(r,na.rm=TRUE),
|
|
max_r = max(r,na.rm=TRUE),
|
|
min_r = min(r,na.rm=TRUE),
|
|
sd_r = sd(r,na.rm=TRUE),
|
|
se_r = sd_r / sqrt(N-1),
|
|
mean_AUC = mean(AUC,na.rm=TRUE),
|
|
median_AUC = median(AUC,na.rm=TRUE),
|
|
max_AUC = max(AUC,na.rm=TRUE),
|
|
min_AUC = min(AUC,na.rm=TRUE),
|
|
sd_AUC = sd(AUC,na.rm=TRUE),
|
|
se_AUC = sd_AUC / sqrt(N-1)
|
|
)
|
|
#print(X_stats_ALL_L)
|
|
|
|
write.csv(X_stats_ALL,file=paste(outDir,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE)
|
|
#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
|
##### Part 3 - Generate summary statistics and calculate the max theoretical L value
|
|
##### Calculate the Z score at each drug conc for each deletion strain
|
|
|
|
|
|
#get the background strains - can be modified to take another argument but for most screens will just be YDL227C
|
|
Background_Strains <- c("YDL227C")
|
|
|
|
#first part of loop will go through for each background strain
|
|
#most cases there will only be one YDL227C
|
|
for(s in Background_Strains){
|
|
X_Background <- X[X$OrfRep == s,]
|
|
|
|
#if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics
|
|
#we may want to consider in some cases giving the max high value to L depending on the data type
|
|
if(table(X_Background$l)[1] == 0){
|
|
X_Background[X_Background$l == 0,]$l <- NA
|
|
X_Background[X_Background$K == 0,]$K <- NA
|
|
X_Background[X_Background$r == 0,]$r <- NA
|
|
X_Background[X_Background$AUC == 0,]$AUC <- NA
|
|
}
|
|
|
|
X_Background <- X_Background[!is.na(X_Background$l),]
|
|
|
|
#get summary stats for L, K, R, AUC
|
|
X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean = mean(l,na.rm=TRUE),
|
|
median = median(l,na.rm=TRUE),
|
|
max = max(l,na.rm=TRUE),
|
|
min = min(l,na.rm=TRUE),
|
|
sd = sd(l,na.rm=TRUE),
|
|
se = sd / sqrt(N-1)
|
|
)
|
|
print(X_stats_BY_L)
|
|
X1_SD <- max(X_stats_BY_L$sd)
|
|
|
|
X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(K)),
|
|
mean = mean(K,na.rm=TRUE),
|
|
median = median(K,na.rm=TRUE),
|
|
max = max(K,na.rm=TRUE),
|
|
min = min(K,na.rm=TRUE),
|
|
sd = sd(K,na.rm=TRUE),
|
|
se = sd / sqrt(N-1)
|
|
)
|
|
|
|
X1_SD_K <- max(X_stats_BY_K$sd)
|
|
|
|
|
|
X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = length(r),
|
|
mean = mean(r,na.rm=TRUE),
|
|
median = median(r,na.rm=TRUE),
|
|
max = max(r,na.rm=TRUE),
|
|
min = min(r,na.rm=TRUE),
|
|
sd = sd(r,na.rm=TRUE),
|
|
se = sd / sqrt(N-1)
|
|
)
|
|
|
|
X1_SD_r <- max(X_stats_BY_r$sd)
|
|
|
|
X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = length(AUC),
|
|
mean = mean(AUC,na.rm=TRUE),
|
|
median = median(AUC,na.rm=TRUE),
|
|
max = max(AUC,na.rm=TRUE),
|
|
min = min(AUC,na.rm=TRUE),
|
|
sd = sd(AUC,na.rm=TRUE),
|
|
se = sd / sqrt(N-1)
|
|
)
|
|
|
|
X1_SD_AUC <- max(X_stats_BY_AUC$sd)
|
|
|
|
X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean_L = mean(l,na.rm=TRUE),
|
|
median_L = median(l,na.rm=TRUE),
|
|
max_L = max(l,na.rm=TRUE),
|
|
min_L = min(l,na.rm=TRUE),
|
|
sd_L = sd(l,na.rm=TRUE),
|
|
se_L = sd_L / sqrt(N-1),
|
|
mean_K = mean(K,na.rm=TRUE),
|
|
median_K = median(K,na.rm=TRUE),
|
|
max_K = max(K,na.rm=TRUE),
|
|
min_K = min(K,na.rm=TRUE),
|
|
sd_K = sd(K,na.rm=TRUE),
|
|
se_K = sd_K / sqrt(N-1),
|
|
mean_r = mean(r,na.rm=TRUE),
|
|
median_r = median(r,na.rm=TRUE),
|
|
max_r = max(r,na.rm=TRUE),
|
|
min_r = min(r,na.rm=TRUE),
|
|
sd_r = sd(r,na.rm=TRUE),
|
|
se_r = sd_r / sqrt(N-1),
|
|
mean_AUC = mean(AUC,na.rm=TRUE),
|
|
median_AUC = median(AUC,na.rm=TRUE),
|
|
max_AUC = max(AUC,na.rm=TRUE),
|
|
min_L = min(AUC,na.rm=TRUE),
|
|
sd_AUC = sd(AUC,na.rm=TRUE),
|
|
se_AUC = sd_AUC / sqrt(N-1)
|
|
)
|
|
|
|
write.csv(X_stats_BY,file=paste(outDir,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE)
|
|
|
|
#calculate the max theoretical L values
|
|
#only look for max values when K is within 2SD of the ref strain
|
|
for(q in unique(X$Conc_Num_Factor)){
|
|
if(q == 0){
|
|
X_within_2SD_K <- X[X$Conc_Num_Factor == q,]
|
|
X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),]
|
|
X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
|
|
X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,]
|
|
X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
|
|
|
|
X_outside_2SD_K <- X[X$Conc_Num_Factor == q,]
|
|
X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),]
|
|
#X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
|
|
X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
|
|
#X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
|
|
|
|
}
|
|
if(q > 0){
|
|
X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,]
|
|
X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),]
|
|
X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
|
|
X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),]
|
|
X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),]
|
|
X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp)
|
|
|
|
X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,]
|
|
X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),]
|
|
#X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,]
|
|
X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
|
|
#X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,]
|
|
X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp)
|
|
}
|
|
}
|
|
|
|
X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean = mean(l),
|
|
median = median(l),
|
|
max = max(l,na.rm=TRUE),
|
|
min = min(l,na.rm=TRUE),
|
|
sd = sd(l),
|
|
se = sd / sqrt(N-1),
|
|
z_max = (max-mean)/sd
|
|
)
|
|
print(X_stats_BY_L_within_2SD_K)
|
|
X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd)
|
|
write.csv(X_stats_BY_L_within_2SD_K,file=paste(outDir_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE)
|
|
|
|
X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean = mean(l),
|
|
median = median(l),
|
|
max = max(l,na.rm=TRUE),
|
|
min = min(l,na.rm=TRUE),
|
|
sd = sd(l),
|
|
se = sd / sqrt(N-1)
|
|
)
|
|
print(X_stats_BY_L_outside_2SD_K)
|
|
X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd)
|
|
|
|
#X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,]
|
|
Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) +
|
|
ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right()
|
|
pdf(paste(outDir_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8)
|
|
print(Outside_2SD_K_L_vs_K)
|
|
dev.off()
|
|
pgg <- ggplotly(Outside_2SD_K_L_vs_K)
|
|
plotly_path <- paste(outDir_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="")
|
|
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
|
|
|
|
|
|
Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") +
|
|
ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right()
|
|
pdf(paste(outDir_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8)
|
|
Outside_2SD_K_delta_background_vs_K
|
|
dev.off()
|
|
pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K)
|
|
#pgg
|
|
plotly_path <- paste(outDir_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="")
|
|
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
|
|
|
|
|
|
#get the background strain mean values at the no drug conc to calculate shift
|
|
Background_L <- X_stats_BY_L$mean[1]
|
|
Background_K <- X_stats_BY_K$mean[1]
|
|
Background_r <- X_stats_BY_r$mean[1]
|
|
Background_AUC <- X_stats_BY_AUC$mean[1]
|
|
|
|
#create empty plots for plotting element
|
|
p_l <- ggplot()
|
|
p_K <- ggplot()
|
|
p_r <- ggplot()
|
|
p_AUC <- ggplot()
|
|
|
|
p_rf_l <- ggplot()
|
|
p_rf_K <- ggplot()
|
|
p_rf_r <- ggplot()
|
|
p_rf_AUC <- ggplot()
|
|
|
|
#get only the deletion strains
|
|
X2 <- X
|
|
X2 <- X2[X2$OrfRep != "YDL227C",]
|
|
|
|
#if set to max theoretical value, add a 1 to SM, if not, leave as 0
|
|
#SM = Set to Max
|
|
X2$SM <- 0
|
|
#set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min
|
|
for(i in 1:length(unique(X2$Conc_Num))){
|
|
Concentration <- unique(X2$Conc_Num)[i]
|
|
X2_temp <- X2[X2$Conc_Num == Concentration,]
|
|
if(Concentration == 0){
|
|
X2_new <- X2_temp
|
|
print(paste("Check loop order, conc =",Concentration,sep=" "))
|
|
}
|
|
if(Concentration > 0){
|
|
try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
|
|
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1)
|
|
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
|
|
|
|
#X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i]
|
|
#X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i]
|
|
#X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i]
|
|
print(paste("Check loop order, conc =",Concentration,sep=" "))
|
|
|
|
X2_new <- rbind(X2_new,X2_temp)
|
|
|
|
}
|
|
}
|
|
X2 <- X2_new
|
|
|
|
|
|
#get only the RF strains
|
|
X2_RF <- X
|
|
X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",]
|
|
#if set to max theoretical value, add a 1 to SM, if not, leave as 0
|
|
#SM = Set to Max
|
|
X2_RF$SM <- 0
|
|
#set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min
|
|
for(i in 1:length(unique(X2_RF$Conc_Num))){
|
|
Concentration <- unique(X2_RF$Conc_Num)[i]
|
|
X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,]
|
|
if(Concentration == 0){
|
|
X2_RF_new <- X2_RF_temp
|
|
print(paste("Check loop order, conc =",Concentration,sep=" "))
|
|
}
|
|
if(Concentration > 0){
|
|
try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
|
|
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1)
|
|
try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i])
|
|
print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" "))
|
|
|
|
X2_RF_new <- rbind(X2_RF_new,X2_RF_temp)
|
|
|
|
}
|
|
}
|
|
X2_RF <- X2_RF_new
|
|
|
|
|
|
#######Part 4 Get the RF Z score values
|
|
#Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score
|
|
X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_")
|
|
|
|
num_genes_RF <- length(unique(X2_RF$OrfRep))
|
|
print(num_genes_RF)
|
|
|
|
|
|
#create the output data.frame containing columns for each RF strain
|
|
InteractionScores_RF <- unique(X2_RF["OrfRep"])
|
|
#InteractionScores_RF$Gene <- unique(X2$Gene)
|
|
InteractionScores_RF$Gene <- NA
|
|
InteractionScores_RF$Raw_Shift_L <- NA
|
|
InteractionScores_RF$Z_Shift_L <- NA
|
|
InteractionScores_RF$lm_Score_L <- NA
|
|
InteractionScores_RF$Z_lm_L <- NA
|
|
InteractionScores_RF$R_Squared_L <- NA
|
|
InteractionScores_RF$Sum_Z_Score_L <- NA
|
|
InteractionScores_RF$Avg_Zscore_L <- NA
|
|
InteractionScores_RF$Raw_Shift_K <- NA
|
|
InteractionScores_RF$Z_Shift_K <- NA
|
|
InteractionScores_RF$lm_Score_K <- NA
|
|
InteractionScores_RF$Z_lm_K <- NA
|
|
InteractionScores_RF$R_Squared_K <- NA
|
|
InteractionScores_RF$Sum_Z_Score_K <- NA
|
|
InteractionScores_RF$Avg_Zscore_K <- NA
|
|
InteractionScores_RF$Raw_Shift_r <- NA
|
|
InteractionScores_RF$Z_Shift_r <- NA
|
|
InteractionScores_RF$lm_Score_r <- NA
|
|
InteractionScores_RF$Z_lm_r <- NA
|
|
InteractionScores_RF$R_Squared_r <- NA
|
|
InteractionScores_RF$Sum_Z_Score_r <- NA
|
|
InteractionScores_RF$Avg_Zscore_r <- NA
|
|
InteractionScores_RF$Raw_Shift_AUC <- NA
|
|
InteractionScores_RF$Z_Shift_AUC <- NA
|
|
InteractionScores_RF$lm_Score_AUC <- NA
|
|
InteractionScores_RF$Z_lm_AUC <- NA
|
|
InteractionScores_RF$R_Squared_AUC <- NA
|
|
InteractionScores_RF$Sum_Z_Score_AUC <- NA
|
|
InteractionScores_RF$Avg_Zscore_AUC <- NA
|
|
InteractionScores_RF$NG <- NA
|
|
InteractionScores_RF$SM <- NA
|
|
|
|
|
|
for(i in 1:num_genes_RF){
|
|
#get each deletion strain ORF
|
|
Gene_Sel <- unique(X2_RF$OrfRep)[i]
|
|
#extract only the current deletion strain and its data
|
|
X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,]
|
|
|
|
X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean_L = mean(l,na.rm = TRUE),
|
|
median_L = median(l,na.rm = TRUE),
|
|
sd_L = sd(l,na.rm = TRUE),
|
|
se_L = sd_L / sqrt(N-1),
|
|
mean_K = mean(K,na.rm = TRUE),
|
|
median_K = median(K,na.rm = TRUE),
|
|
sd_K = sd(K,na.rm = TRUE),
|
|
se_K = sd_K / sqrt(N-1),
|
|
mean_r = mean(r,na.rm = TRUE),
|
|
median_r = median(r,na.rm = TRUE),
|
|
sd_r = sd(r,na.rm = TRUE),
|
|
se_r = sd_r / sqrt(N-1),
|
|
mean_AUC = mean(AUC,na.rm = TRUE),
|
|
median_AUC = median(AUC,na.rm = TRUE),
|
|
sd_AUC = sd(AUC,na.rm = TRUE),
|
|
se_AUC = sd_AUC / sqrt(N-1),
|
|
NG = sum(NG,na.rm=TRUE),
|
|
DB= sum(DB,na.rm=TRUE),
|
|
SM = sum(SM,na.rm=TRUE)
|
|
)
|
|
|
|
|
|
#Get shift vals
|
|
#if L is 0, that means the no growth on no drug
|
|
#if L is NA at 0, that means the spot was removed due to contamination
|
|
#if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
|
|
#otherwise calculate shift at no drug conc
|
|
if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){
|
|
X_stats_interaction$Raw_Shift_L <- 0
|
|
X_stats_interaction$Raw_Shift_K <- 0
|
|
X_stats_interaction$Raw_Shift_r <- 0
|
|
X_stats_interaction$Raw_Shift_AUC <- 0
|
|
X_stats_interaction$Z_Shift_L <- 0
|
|
X_stats_interaction$Z_Shift_K <- 0
|
|
X_stats_interaction$Z_Shift_r <- 0
|
|
X_stats_interaction$Z_Shift_AUC <- 0
|
|
}else{
|
|
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
|
|
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
|
|
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
|
|
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
|
|
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1]
|
|
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1]
|
|
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1]
|
|
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1]
|
|
}
|
|
|
|
|
|
#get WT vals
|
|
X_stats_interaction$WT_l <- X_stats_BY_L$mean
|
|
X_stats_interaction$WT_K <- X_stats_BY_K$mean
|
|
X_stats_interaction$WT_r <- X_stats_BY_r$mean
|
|
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
|
|
|
|
#Get WT SD
|
|
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
|
|
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
|
|
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
|
|
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
|
|
|
|
|
|
#only get scores if there's growth at no drug
|
|
if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){
|
|
|
|
#calculate expected values
|
|
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
|
|
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
|
|
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
|
|
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
|
|
|
|
#calculate normalized delta values
|
|
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
|
|
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
|
|
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
|
|
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
|
|
|
|
#disregard shift for no growth values in Z score calculation
|
|
if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
|
|
|
|
}
|
|
#disregard shift for set to max values in Z score calculation
|
|
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
|
|
#only calculate the L value without shift since L is the only adjusted value
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
|
|
|
|
}
|
|
|
|
|
|
|
|
#calculate Z score at each concentration
|
|
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
|
|
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
|
|
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
|
|
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
|
|
|
|
#get linear model
|
|
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
|
|
#get interaction score calculated by linear model and R-squared value for the fit
|
|
gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
|
|
r_squared_l <- summary(gene_lm_L)$r.squared
|
|
gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
|
|
r_squared_K <- summary(gene_lm_K)$r.squared
|
|
gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
|
|
r_squared_r <- summary(gene_lm_r)$r.squared
|
|
gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
|
|
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
|
|
|
|
#Get total of non removed values
|
|
Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1
|
|
|
|
#report the scores
|
|
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1]
|
|
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1]
|
|
|
|
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
|
|
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
|
|
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L
|
|
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l
|
|
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)
|
|
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc)
|
|
|
|
|
|
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
|
|
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
|
|
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K
|
|
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K
|
|
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)
|
|
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc)
|
|
|
|
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
|
|
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
|
|
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r
|
|
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r
|
|
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)
|
|
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1)
|
|
|
|
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
|
|
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
|
|
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC
|
|
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC
|
|
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)
|
|
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1)
|
|
}
|
|
|
|
if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){
|
|
|
|
#calculate expected values
|
|
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
|
|
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
|
|
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
|
|
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
|
|
|
|
#calculate normalized delta values
|
|
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
|
|
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
|
|
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
|
|
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
|
|
|
|
#disregard shift for missing values in Z score calculatiom
|
|
if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
|
|
}
|
|
#disregard shift for set to max values in Z score calculation
|
|
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
|
|
#only calculate the L value without shift since L is the only adjusted value
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
|
|
|
|
}
|
|
|
|
|
|
#calculate Z score at each concentration
|
|
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
|
|
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
|
|
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
|
|
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
|
|
|
|
#NA values for the next part since there's an NA or 0 at the no drug.
|
|
gene_lm_L <- NA
|
|
gene_lm_K <- NA
|
|
gene_lm_r <- NA
|
|
gene_lm_AUC <- NA
|
|
|
|
gene_interaction_L <- NA
|
|
r_squared_l <- NA
|
|
gene_interaction_K <- NA
|
|
r_squared_K <- NA
|
|
gene_interaction_r <- NA
|
|
r_squared_r <- NA
|
|
gene_interaction_AUC <- NA
|
|
r_squared_AUC <- NA
|
|
|
|
X_stats_interaction$Raw_Shift_L <- NA
|
|
X_stats_interaction$Raw_Shift_K <- NA
|
|
X_stats_interaction$Raw_Shift_r <- NA
|
|
X_stats_interaction$Raw_Shift_AUC <- NA
|
|
|
|
X_stats_interaction$Z_Shift_L <- NA
|
|
X_stats_interaction$Z_Shift_K <- NA
|
|
X_stats_interaction$Z_Shift_r <- NA
|
|
X_stats_interaction$Z_Shift_AUC <- NA
|
|
|
|
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1]
|
|
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1]
|
|
|
|
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
|
|
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
|
|
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L
|
|
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l
|
|
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
|
|
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
|
|
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K
|
|
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K
|
|
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
|
|
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
|
|
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r
|
|
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r
|
|
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
|
|
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
|
|
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC
|
|
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC
|
|
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
|
|
}
|
|
|
|
if(i == 1){
|
|
X_stats_interaction_ALL_RF <- X_stats_interaction
|
|
}
|
|
if(i > 1){
|
|
X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction)
|
|
}
|
|
|
|
InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE)
|
|
InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE)
|
|
InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE)
|
|
|
|
#X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int)
|
|
|
|
|
|
}
|
|
print("Pass RF Calculation loop")
|
|
|
|
lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE)
|
|
lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE)
|
|
lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE)
|
|
lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE)
|
|
|
|
lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE)
|
|
lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE)
|
|
lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE)
|
|
lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE)
|
|
|
|
print(paste("Mean RF linear regression score L",lm_mean_L))
|
|
|
|
|
|
InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L)
|
|
InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K)
|
|
InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r)
|
|
InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC)
|
|
|
|
|
|
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),]
|
|
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),]
|
|
write.csv(InteractionScores_RF,paste(outDir,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE)
|
|
|
|
|
|
for(i in 1:num_genes_RF){
|
|
Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i]
|
|
X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,]
|
|
X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,]
|
|
|
|
p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-65,65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) +
|
|
annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) +
|
|
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
|
|
theme_Publication()
|
|
|
|
p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-65,65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) +
|
|
annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) +
|
|
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
|
|
theme_Publication()
|
|
|
|
p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-0.65,0.65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) +
|
|
annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) +
|
|
annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) +
|
|
theme_Publication()
|
|
|
|
p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-6500,6500)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) +
|
|
annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) +
|
|
annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) +
|
|
theme_Publication()
|
|
|
|
|
|
|
|
if(i == 1){
|
|
X_stats_interaction_ALL_RF_final <- X_ZCalculations
|
|
}
|
|
if(i > 1){
|
|
X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations)
|
|
}
|
|
}
|
|
print("Pass RF ggplot loop")
|
|
write.csv(X_stats_interaction_ALL_RF_final,paste(outDir,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE)
|
|
|
|
|
|
####### Part 5 - Get Zscores for Gene deletion strains
|
|
|
|
|
|
|
|
#get total number of genes for the next loop
|
|
num_genes <- length(unique(X2$OrfRep))
|
|
print(num_genes)
|
|
|
|
#create the output data.frame containing columns for each deletion strain
|
|
InteractionScores <- unique(X2["OrfRep"])
|
|
#InteractionScores$Gene <- unique(X2$Gene)
|
|
InteractionScores$Gene <- NA
|
|
InteractionScores$Raw_Shift_L <- NA
|
|
InteractionScores$Z_Shift_L <- NA
|
|
InteractionScores$lm_Score_L <- NA
|
|
InteractionScores$Z_lm_L <- NA
|
|
InteractionScores$R_Squared_L <- NA
|
|
InteractionScores$Sum_Z_Score_L <- NA
|
|
InteractionScores$Avg_Zscore_L <- NA
|
|
InteractionScores$Raw_Shift_K <- NA
|
|
InteractionScores$Z_Shift_K <- NA
|
|
InteractionScores$lm_Score_K <- NA
|
|
InteractionScores$Z_lm_K <- NA
|
|
InteractionScores$R_Squared_K <- NA
|
|
InteractionScores$Sum_Z_Score_K <- NA
|
|
InteractionScores$Avg_Zscore_K <- NA
|
|
InteractionScores$Raw_Shift_r <- NA
|
|
InteractionScores$Z_Shift_r <- NA
|
|
InteractionScores$lm_Score_r <- NA
|
|
InteractionScores$Z_lm_r <- NA
|
|
InteractionScores$R_Squared_r <- NA
|
|
InteractionScores$Sum_Z_Score_r <- NA
|
|
InteractionScores$Avg_Zscore_r <- NA
|
|
InteractionScores$Raw_Shift_AUC <- NA
|
|
InteractionScores$Z_Shift_AUC <- NA
|
|
InteractionScores$lm_Score_AUC <- NA
|
|
InteractionScores$Z_lm_AUC <- NA
|
|
InteractionScores$R_Squared_AUC <- NA
|
|
InteractionScores$Sum_Z_Score_AUC <- NA
|
|
InteractionScores$Avg_Zscore_AUC <- NA
|
|
InteractionScores$NG <- NA
|
|
InteractionScores$DB <- NA
|
|
InteractionScores$SM <- NA
|
|
|
|
for(i in 1:num_genes){
|
|
#get each deletion strain ORF
|
|
Gene_Sel <- unique(X2$OrfRep)[i]
|
|
#extract only the current deletion strain and its data
|
|
X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,]
|
|
|
|
X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise,
|
|
N = (length(l)),
|
|
mean_L = mean(l,na.rm = TRUE),
|
|
median_L = median(l,na.rm = TRUE),
|
|
sd_L = sd(l,na.rm = TRUE),
|
|
se_L = sd_L / sqrt(N-1),
|
|
mean_K = mean(K,na.rm = TRUE),
|
|
median_K = median(K,na.rm = TRUE),
|
|
sd_K = sd(K,na.rm = TRUE),
|
|
se_K = sd_K / sqrt(N-1),
|
|
mean_r = mean(r,na.rm = TRUE),
|
|
median_r = median(r,na.rm = TRUE),
|
|
sd_r = sd(r,na.rm = TRUE),
|
|
se_r = sd_r / sqrt(N-1),
|
|
mean_AUC = mean(AUC,na.rm = TRUE),
|
|
median_AUC = median(AUC,na.rm = TRUE),
|
|
sd_AUC = sd(AUC,na.rm = TRUE),
|
|
se_AUC = sd_AUC / sqrt(N-1),
|
|
NG = sum(NG,na.rm=TRUE),
|
|
DB= sum(DB,na.rm=TRUE),
|
|
SM = sum(SM,na.rm=TRUE)
|
|
)
|
|
|
|
|
|
#Get shift vals
|
|
#if L is 0, that means the no growth on no drug
|
|
#if L is NA at 0, that means the spot was removed due to contamination
|
|
#if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
|
|
#otherwise calculate shift at no drug conc
|
|
if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){
|
|
X_stats_interaction$Raw_Shift_L <- 0
|
|
X_stats_interaction$Raw_Shift_K <- 0
|
|
X_stats_interaction$Raw_Shift_r <- 0
|
|
X_stats_interaction$Raw_Shift_AUC <- 0
|
|
X_stats_interaction$Z_Shift_L <- 0
|
|
X_stats_interaction$Z_Shift_K <- 0
|
|
X_stats_interaction$Z_Shift_r <- 0
|
|
X_stats_interaction$Z_Shift_AUC <- 0
|
|
}else{
|
|
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
|
|
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
|
|
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
|
|
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
|
|
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1]
|
|
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1]
|
|
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1]
|
|
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1]
|
|
}
|
|
|
|
|
|
#get WT vals
|
|
X_stats_interaction$WT_l <- X_stats_BY_L$mean
|
|
X_stats_interaction$WT_K <- X_stats_BY_K$mean
|
|
X_stats_interaction$WT_r <- X_stats_BY_r$mean
|
|
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
|
|
|
|
#Get WT SD
|
|
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
|
|
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
|
|
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
|
|
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
|
|
|
|
|
|
#only get scores if there's growth at no drug
|
|
if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){
|
|
|
|
#calculate expected values
|
|
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
|
|
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
|
|
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
|
|
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
|
|
|
|
#calculate normalized delta values
|
|
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
|
|
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
|
|
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
|
|
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
|
|
|
|
#disregard shift for no growth values in Z score calculation
|
|
if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
|
|
|
|
}
|
|
#disregard shift for set to max values in Z score calculation
|
|
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
|
|
#only calculate the L value without shift since L is the only adjusted value
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
|
|
|
|
}
|
|
|
|
|
|
|
|
#calculate Z score at each concentration
|
|
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
|
|
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
|
|
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
|
|
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
|
|
|
|
#get linear model
|
|
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction)
|
|
|
|
#get interaction score calculated by linear model and R-squared value for the fit
|
|
gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
|
|
r_squared_l <- summary(gene_lm_L)$r.squared
|
|
gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
|
|
r_squared_K <- summary(gene_lm_K)$r.squared
|
|
gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
|
|
r_squared_r <- summary(gene_lm_r)$r.squared
|
|
gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
|
|
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
|
|
|
|
#Get total of non removed values
|
|
Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1
|
|
|
|
#report the scores
|
|
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1])
|
|
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1])
|
|
|
|
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
|
|
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
|
|
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L
|
|
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L
|
|
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l
|
|
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)
|
|
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc)
|
|
|
|
|
|
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
|
|
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
|
|
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K
|
|
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K
|
|
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K
|
|
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)
|
|
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc)
|
|
|
|
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
|
|
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
|
|
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r
|
|
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r
|
|
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r
|
|
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)
|
|
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1)
|
|
|
|
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
|
|
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
|
|
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC
|
|
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC
|
|
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC
|
|
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)
|
|
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1)
|
|
}
|
|
|
|
if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){
|
|
|
|
#calculate expected values
|
|
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
|
|
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
|
|
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
|
|
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
|
|
|
|
#calculate normalized delta values
|
|
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
|
|
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
|
|
X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r
|
|
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
|
|
|
|
#disregard shift for missing values in Z score calculatiom
|
|
if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r
|
|
X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC
|
|
}
|
|
#disregard shift for set to max values in Z score calculation
|
|
if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){
|
|
X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l
|
|
#only calculate the L value without shift since L is the only adjusted value
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r
|
|
#X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC
|
|
|
|
}
|
|
|
|
|
|
#calculate Z score at each concentration
|
|
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l)
|
|
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K)
|
|
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r)
|
|
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC)
|
|
|
|
#NA values for the next part since there's an NA or 0 at the no drug.
|
|
gene_lm_L <- NA
|
|
gene_lm_K <- NA
|
|
gene_lm_r <- NA
|
|
|
|
gene_interaction_L <- NA
|
|
r_squared_l <- NA
|
|
gene_interaction_K <- NA
|
|
r_squared_K <- NA
|
|
gene_interaction_r <- NA
|
|
r_squared_r <- NA
|
|
|
|
X_stats_interaction$Raw_Shift_L <- NA
|
|
X_stats_interaction$Raw_Shift_K <- NA
|
|
X_stats_interaction$Raw_Shift_r <- NA
|
|
X_stats_interaction$Raw_Shift_AUC <- NA
|
|
|
|
X_stats_interaction$Z_Shift_L <- NA
|
|
X_stats_interaction$Z_Shift_K <- NA
|
|
X_stats_interaction$Z_Shift_r <- NA
|
|
X_stats_interaction$Z_Shift_AUC <- NA
|
|
|
|
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1])
|
|
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1])
|
|
|
|
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
|
|
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
|
|
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l
|
|
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
|
|
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
|
|
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA
|
|
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K
|
|
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
|
|
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
|
|
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r
|
|
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
|
|
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
|
|
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
|
|
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC
|
|
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
|
|
}
|
|
|
|
if(i == 1){
|
|
X_stats_interaction_ALL <- X_stats_interaction
|
|
}
|
|
if(i > 1){
|
|
X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction)
|
|
}
|
|
|
|
InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE)
|
|
InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE)
|
|
InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE)
|
|
|
|
#X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int)
|
|
|
|
|
|
}
|
|
print("Pass Int Calculation loop")
|
|
InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),]
|
|
InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),]
|
|
df_order_by_OrfRep <- unique(InteractionScores$OrfRep)
|
|
#X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),]
|
|
write.csv(InteractionScores,paste(outDir,"ZScores_Interaction.csv",sep=""),row.names=FALSE)
|
|
|
|
InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,]
|
|
InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,]
|
|
InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,]
|
|
InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,]
|
|
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,]
|
|
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,]
|
|
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,]
|
|
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,]
|
|
|
|
InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),]
|
|
InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),]
|
|
InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),]
|
|
InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),]
|
|
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),]
|
|
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),]
|
|
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),]
|
|
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),]
|
|
|
|
write.csv(InteractionScores_deletion_enhancers_L,paste(outDir,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_K,paste(outDir,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_suppressors_L,paste(outDir,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_suppressors_K,paste(outDir,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outDir,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outDir,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outDir,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outDir,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outDir,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outDir,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE)
|
|
|
|
#get enhancers and suppressors for linear regression
|
|
InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,]
|
|
InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,]
|
|
InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,]
|
|
InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,]
|
|
|
|
InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),]
|
|
InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),]
|
|
InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),]
|
|
InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),]
|
|
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),]
|
|
|
|
write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outDir,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outDir,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outDir,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outDir,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outDir,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE)
|
|
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outDir,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE)
|
|
|
|
|
|
write.csv(Labels,file=paste(StudyInfo),row.names = FALSE)
|
|
print('ln 1570 write StudyInfo.csv after ')
|
|
#write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE)
|
|
|
|
for(i in 1:num_genes){
|
|
Gene_Sel <- unique(InteractionScores$OrfRep)[i]
|
|
X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,]
|
|
X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,]
|
|
|
|
p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-65,65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) +
|
|
annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) +
|
|
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
|
|
theme_Publication()
|
|
|
|
p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-65,65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) +
|
|
annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) +
|
|
annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) +
|
|
theme_Publication()
|
|
|
|
p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-0.65,0.65)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) +
|
|
annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) +
|
|
annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) +
|
|
theme_Publication()
|
|
|
|
p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) +
|
|
coord_cartesian(ylim = c(-6500,6500)) +
|
|
geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) +
|
|
ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) +
|
|
annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) +
|
|
#annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) +
|
|
annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) +
|
|
annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) +
|
|
annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) +
|
|
annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) +
|
|
scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) +
|
|
theme_Publication()
|
|
|
|
|
|
|
|
if(i == 1){
|
|
X_stats_interaction_ALL_final <- X_ZCalculations
|
|
}
|
|
if(i > 1){
|
|
X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations)
|
|
}
|
|
}
|
|
print("Pass Int ggplot loop")
|
|
write.csv(X_stats_interaction_ALL_final,paste(outDir,"ZScore_Calculations.csv",sep=""),row.names = FALSE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Blank <- ggplot(X2_RF) + geom_blank()
|
|
|
|
pdf(paste(outDir,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE)
|
|
|
|
X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise,
|
|
mean_L = mean(l,na.rm=TRUE),
|
|
median_L = median(l,na.rm=TRUE),
|
|
max_L = max(l,na.rm=TRUE),
|
|
min_L = min(l,na.rm=TRUE),
|
|
sd_L = sd(l,na.rm=TRUE),
|
|
mean_K = mean(K,na.rm=TRUE),
|
|
median_K = median(K,na.rm=TRUE),
|
|
max_K = max(K,na.rm=TRUE),
|
|
min_K = min(K,na.rm=TRUE),
|
|
sd_K = sd(K,na.rm=TRUE),
|
|
mean_r = mean(r,na.rm=TRUE),
|
|
median_r = median(r,na.rm=TRUE),
|
|
max_r = max(r,na.rm=TRUE),
|
|
min_r = min(r,na.rm=TRUE),
|
|
sd_r = sd(r,na.rm=TRUE),
|
|
mean_AUC = mean(AUC,na.rm=TRUE),
|
|
median_AUC = median(AUC,na.rm=TRUE),
|
|
max_AUC = max(AUC,na.rm=TRUE),
|
|
min_AUC = min(AUC,na.rm=TRUE),
|
|
sd_AUC = sd(AUC,na.rm=TRUE),
|
|
NG = sum(NG,na.rm=TRUE),
|
|
DB = sum(DB,na.rm=TRUE),
|
|
SM = sum(SM,na.rm=TRUE)
|
|
)
|
|
|
|
|
|
L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
|
|
annotate("text",x=-0.25,y=10,label="NG") +
|
|
annotate("text",x=-0.25,y=5,label="DB") +
|
|
annotate("text",x=-0.25,y=0,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) +
|
|
annotate("text",x=-0.25,y=-5,label="NG") +
|
|
annotate("text",x=-0.25,y=-12.5,label="DB") +
|
|
annotate("text",x=-0.25,y=-20,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
|
|
annotate("text",x=-0.25,y=.9,label="NG") +
|
|
annotate("text",x=-0.25,y=.8,label="DB") +
|
|
annotate("text",x=-0.25,y=.7,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
|
|
annotate("text",x=-0.25,y=11000,label="NG") +
|
|
annotate("text",x=-0.25,y=10000,label="DB") +
|
|
annotate("text",x=-0.25,y=9000,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
|
|
L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
|
|
theme_Publication()
|
|
|
|
K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
|
|
theme_Publication()
|
|
|
|
r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
|
|
theme_Publication()
|
|
|
|
AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
|
|
theme_Publication()
|
|
|
|
|
|
grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2)
|
|
grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2)
|
|
|
|
|
|
|
|
#plot the references
|
|
#grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4)
|
|
#grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2)
|
|
|
|
#loop for grid arrange 4x3
|
|
j <- rep(1,((num_genes)/3)-1)
|
|
for(n in 1:length(j)){
|
|
j[n+1] <- n*3 + 1
|
|
}
|
|
#loop for printing each plot
|
|
num <- 0
|
|
for(m in 1:(round((num_genes)/3)-1)){
|
|
num <- j[m]
|
|
grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3)
|
|
#grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3)
|
|
|
|
|
|
#p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]]
|
|
}
|
|
if(num_genes != (num+2)){
|
|
total_num = num_genes - (num+2)
|
|
if(total_num == 5){
|
|
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
|
|
grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
}
|
|
if(total_num == 4){
|
|
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
|
|
grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
}
|
|
|
|
if(total_num == 3){
|
|
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3)
|
|
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
|
|
if(total_num == 2){
|
|
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
|
|
if(total_num == 1){
|
|
grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
#grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
}
|
|
dev.off()
|
|
|
|
|
|
|
|
pdf(paste(outDir,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE)
|
|
|
|
X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise,
|
|
mean_L = mean(l,na.rm=TRUE),
|
|
median_L = median(l,na.rm=TRUE),
|
|
max_L = max(l,na.rm=TRUE),
|
|
min_L = min(l,na.rm=TRUE),
|
|
sd_L = sd(l,na.rm=TRUE),
|
|
mean_K = mean(K,na.rm=TRUE),
|
|
median_K = median(K,na.rm=TRUE),
|
|
max_K = max(K,na.rm=TRUE),
|
|
min_K = min(K,na.rm=TRUE),
|
|
sd_K = sd(K,na.rm=TRUE),
|
|
mean_r = mean(r,na.rm=TRUE),
|
|
median_r = median(r,na.rm=TRUE),
|
|
max_r = max(r,na.rm=TRUE),
|
|
min_r = min(r,na.rm=TRUE),
|
|
sd_r = sd(r,na.rm=TRUE),
|
|
mean_AUC = mean(AUC,na.rm=TRUE),
|
|
median_AUC = median(AUC,na.rm=TRUE),
|
|
max_AUC = max(AUC,na.rm=TRUE),
|
|
min_AUC = min(AUC,na.rm=TRUE),
|
|
sd_AUC = sd(AUC,na.rm=TRUE),
|
|
NG = sum(NG,na.rm=TRUE),
|
|
DB = sum(DB,na.rm=TRUE),
|
|
SM = sum(SM,na.rm=TRUE)
|
|
)
|
|
|
|
|
|
L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
|
|
annotate("text",x=-0.25,y=10,label="NG") +
|
|
annotate("text",x=-0.25,y=5,label="DB") +
|
|
annotate("text",x=-0.25,y=0,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) +
|
|
annotate("text",x=-0.25,y=-5,label="NG") +
|
|
annotate("text",x=-0.25,y=-12.5,label="DB") +
|
|
annotate("text",x=-0.25,y=-20,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
|
|
annotate("text",x=-0.25,y=.9,label="NG") +
|
|
annotate("text",x=-0.25,y=.8,label="DB") +
|
|
annotate("text",x=-0.25,y=.7,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) +
|
|
stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x),
|
|
geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") +
|
|
scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) +
|
|
annotate("text",x=-0.25,y=11000,label="NG") +
|
|
annotate("text",x=-0.25,y=10000,label="DB") +
|
|
annotate("text",x=-0.25,y=9000,label="SM") +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) +
|
|
annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) +
|
|
theme_Publication()
|
|
|
|
|
|
L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) +
|
|
theme_Publication()
|
|
|
|
K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) +
|
|
theme_Publication()
|
|
|
|
r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) +
|
|
theme_Publication()
|
|
|
|
AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() +
|
|
scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) +
|
|
ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) +
|
|
theme_Publication()
|
|
|
|
|
|
grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2)
|
|
grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2)
|
|
|
|
|
|
|
|
#plot the references
|
|
#grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4)
|
|
#grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2)
|
|
|
|
#loop for grid arrange 4x3
|
|
j <- rep(1,((num_genes_RF)/3)-1)
|
|
for(n in 1:length(j)){
|
|
j[n+1] <- n*3 + 1
|
|
}
|
|
#loop for printing each plot
|
|
num <- 0
|
|
for(m in 1:(round((num_genes_RF)/3)-1)){
|
|
num <- j[m]
|
|
grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3)
|
|
#grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3)
|
|
|
|
|
|
#p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]]
|
|
}
|
|
if(num_genes_RF != (num+2)){
|
|
total_num = num_genes_RF - (num+2)
|
|
if(total_num == 5){
|
|
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
|
|
grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
}
|
|
if(total_num == 4){
|
|
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
|
|
grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
}
|
|
|
|
if(total_num == 3){
|
|
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3)
|
|
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
|
|
if(total_num == 2){
|
|
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
|
|
if(total_num == 1){
|
|
grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3)
|
|
#grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3)
|
|
}
|
|
}
|
|
dev.off()
|
|
|
|
#print rank plots for L and K gene interactions
|
|
|
|
|
|
InteractionScores_AdjustMissing <- InteractionScores
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001
|
|
|
|
InteractionScores_AdjustMissing$L_Rank <- NA
|
|
InteractionScores_AdjustMissing$K_Rank <- NA
|
|
InteractionScores_AdjustMissing$r_Rank <- NA
|
|
InteractionScores_AdjustMissing$AUC_Rank <- NA
|
|
|
|
InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L)
|
|
InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K)
|
|
InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r)
|
|
InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC)
|
|
|
|
#
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001
|
|
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001
|
|
|
|
InteractionScores_AdjustMissing$L_Rank_lm <- NA
|
|
InteractionScores_AdjustMissing$K_Rank_lm <- NA
|
|
InteractionScores_AdjustMissing$r_Rank_lm <- NA
|
|
InteractionScores_AdjustMissing$AUC_Rank_lm <- NA
|
|
|
|
InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L)
|
|
InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K)
|
|
InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r)
|
|
InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC)
|
|
|
|
|
|
|
|
Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
pdf(paste(outDir,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
|
|
|
|
grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2)
|
|
grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2)
|
|
|
|
dev.off()
|
|
|
|
|
|
Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) +
|
|
annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
pdf(paste(outDir,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
|
|
|
|
grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2)
|
|
grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2)
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,]
|
|
|
|
#find overlaps
|
|
X_NArm$Overlap <- "No Effect"
|
|
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both")
|
|
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both")
|
|
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only")
|
|
try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only")
|
|
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only")
|
|
try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only")
|
|
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score")
|
|
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score")
|
|
|
|
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
|
|
get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L)
|
|
L_lm <- summary(get_lm_L)
|
|
|
|
get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K)
|
|
K_lm <- summary(get_lm_K)
|
|
|
|
get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r)
|
|
r_lm <- summary(get_lm_r)
|
|
|
|
get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC)
|
|
AUC_lm <- summary(get_lm_AUC)
|
|
|
|
pdf(paste(outDir,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE)
|
|
|
|
print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Avg Zscore vs lm L") +
|
|
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Avg Zscore vs lm K") +
|
|
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Avg Zscore vs lm r") +
|
|
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Avg Zscore vs lm AUC") +
|
|
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
dev.off()
|
|
|
|
|
|
lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) +
|
|
geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") +
|
|
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()
|
|
|
|
pgg <- ggplotly(lm_v_Zscore_L)
|
|
#pgg
|
|
plotly_path <- paste(outDir,"Avg_Zscore_vs_lm_NA_rm.html",sep="")
|
|
saveWidget(pgg, file=plotly_path, selfcontained =TRUE)
|
|
|
|
X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L)
|
|
X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K)
|
|
X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r)
|
|
X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC)
|
|
|
|
X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L)
|
|
X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K)
|
|
X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r)
|
|
X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC)
|
|
|
|
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
|
|
get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank)
|
|
L_lm2 <- summary(get_lm_L2)
|
|
|
|
get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank)
|
|
K_lm2 <- summary(get_lm_K2)
|
|
|
|
get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank)
|
|
r_lm2 <- summary(get_lm_r2)
|
|
|
|
get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank)
|
|
AUC_lm2 <- summary(get_lm_AUC2)
|
|
|
|
num_genes_NArm2 <- (dim(X_NArm)[1])/2
|
|
|
|
pdf(paste(outDir,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE)
|
|
|
|
print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Rank Avg Zscore vs lm L") +
|
|
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Rank Avg Zscore vs lm K") +
|
|
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Rank Avg Zscore vs lm r") +
|
|
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) +
|
|
ggtitle("Rank of Avg Zscore vs lm AUC") +
|
|
annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right())
|
|
|
|
|
|
|
|
dev.off()
|
|
|
|
|
|
|
|
Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) +
|
|
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) +
|
|
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
pdf(paste(outDir,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
|
|
|
|
grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2)
|
|
grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2)
|
|
|
|
dev.off()
|
|
|
|
|
|
Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) +
|
|
annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) +
|
|
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
|
|
Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) +
|
|
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) +
|
|
annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) +
|
|
geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) +
|
|
theme_Publication()
|
|
|
|
pdf(paste(outDir,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE)
|
|
|
|
grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2)
|
|
grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2)
|
|
|
|
dev.off()
|
|
|
|
}
|
|
|
|
|
|
|
|
#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
|
|
get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L)
|
|
L_lm_1 <- summary(get_lm_1)
|
|
|
|
get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L)
|
|
L_lm_2 <- summary(get_lm_2)
|
|
|
|
get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L)
|
|
L_lm_3 <- summary(get_lm_3)
|
|
|
|
get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K)
|
|
L_lm_4 <- summary(get_lm_4)
|
|
|
|
get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K)
|
|
L_lm_5 <- summary(get_lm_5)
|
|
|
|
get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r)
|
|
L_lm_6 <- summary(get_lm_6)
|
|
|
|
|
|
pdf(file=paste(outDir,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE)
|
|
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction L vs. Interaction K") +
|
|
xlab("z-score L") + ylab("z-score K") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction L vs. Interaction r") +
|
|
xlab("z-score L") + ylab("z-score r") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction L vs. Interaction AUC") +
|
|
xlab("z-score L") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction K vs. Interaction r") +
|
|
xlab("z-score K") + ylab("z-score r") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction K vs. Interaction AUC") +
|
|
xlab("z-score K") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_smooth(method="lm",color="tomato3") +
|
|
ggtitle("Interaction r vs. Interaction AUC") +
|
|
xlab("z-score r") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),]
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") +
|
|
ggtitle("Interaction L vs. Interaction K") +
|
|
xlab("z-score L") + ylab("z-score K") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") +
|
|
ggtitle("Interaction L vs. Interaction r") +
|
|
xlab("z-score L") + ylab("z-score r") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") +
|
|
ggtitle("Interaction L vs. Interaction AUC") +
|
|
xlab("z-score L") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") +
|
|
ggtitle("Interaction K vs. Interaction r") +
|
|
xlab("z-score K") + ylab("z-score r") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") +
|
|
ggtitle("Interaction K vs. Interaction AUC") +
|
|
xlab("z-score K") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") +
|
|
geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") +
|
|
ggtitle("Interaction r vs. Interaction AUC") +
|
|
xlab("z-score r") + ylab("z-score AUC") +
|
|
annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) +
|
|
theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18))
|
|
|
|
|
|
dev.off()
|
|
|
|
#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE)
|
|
timestamp()
|