Files
hartman-server/workflow/apps/r/interactions.R
2024-08-10 09:54:01 -04:00

3487 lines
168 KiB
R

#!/usr/bin/env Rscript
# NOTE: The script now has 6 optional arguments:
# 1. Path to input easy results file
# 2. /output/ directory
# 3. Path to StudyInfo.csv
# 4. Path to SGDgeneList
# 5. Standard deviation value
# 6. The experiment number (Exp# directory)
library(ggplot2)
library(plyr)
library(extrafont)
library(gridExtra)
library(gplots)
library(RColorBrewer)
library(stringr)
library(gdata)
library(plotly)
library(htmlwidgets)
# Parse arguments
args <- commandArgs(TRUE)
inputFile <- args[1]
# Set output dir
if (length(args) >= 2) {
outDir <- args[2]
} else {
outDir <- "/ZScores/" # for legacy workflow
}
# Set StudyInfo file path
if (length(args) >= 3) {
studyInfo <- args[3]
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
# Set SGDgeneList file path
if (length(args) >= 4) {
SGDgeneList <- args[4]
} else {
SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow
}
# Set standard deviation
if (length(args) >= 5) {
delBGFactor <- args[5]
} else {
# User prompt for std multiplier Value
print("Enter a Standard Deviation value for noise filter")
print("Sean Santos recommends 3 or 5")
delBGFactor <- readLines(file("stdin"), n = 1L)
}
delBGFactor <- as.numeric(delBGFactor)
if (is.na(delBGFactor)) {
delBGFactor <- 3 # Recommended by Sean
}
print(paste("The Standard Deviation Value is:", delBGFactor))
# Set experiment #
if (length(args) >= 6) {
expNumber <- args[6]
} else {
# User prompt for std multiplier Value
print("Enter the experiment number (Exp# directory)")
expNumber <- readLines(file("stdin"), n = 1L)
}
expNumber <- as.numeric(expNumber)
outDir_QC <- paste(outDir, "QC/", sep = "")
if (!file.exists(outDir)) {
dir.create(file.path(outDir))
}
if (!file.exists(outDir_QC)) {
dir.create(file.path(outDir_QC))
}
# Capture Exp_ number, use it to Save args[2]{std}to Labels field and then
# write to Labels to studyInfo.txt for future reference
Labels <- read.csv(file = studyInfo, stringsAsFactors = FALSE) # sep = ","
Labels[expNumber, 3] <- delBGFactor
write.csv(Labels, file = studyInfo, row.names = FALSE)
# Begin User Data Selection Section
# Read in the data
X <- read.delim(inputFile, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
X <- X[!(X[[1]] %in% c("", "Scan")), ]
# X <- X[!(X[[1]]%in%c(61:76)), ] #Remove dAmp plates which are Scans 61 thru 76
# X <- X[which(X$Specifics == "WT"), ]
# X_length <- length(X[1, ])
# X_end <- length(X[1, ]) - 2
# X <- X[, c(1:42, X_end:X_length)]
# Use numeric data to perform operations
X$Col <- as.numeric(X$Col)
X$Row <- as.numeric(X$Row)
X$l <- as.numeric(X$l)
X$K <- as.numeric(X$K)
X$r <- as.numeric(X$r)
X$Scan <- as.numeric(X$Scan)
X$AUC <- as.numeric(X$AUC)
X$LstBackgrd <- as.numeric(X$LstBackgrd)
X$X1stBackgrd <- as.numeric(X$X1stBackgrd)
# Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col
# as was the case in Gemcitabin and Cytarabin experiments.
# The following allows user to rename columns so as to get the appropriate
# data where it needs to be for the script to run properly.
# colnames(X)[7] <- "Modifier1"
# colnames(X)[8] <- "Conc1"
# colnames(X)[10] <- "Drug"
# colnames(X)[11] <- "Conc"
# Set the OrfRep to YDL227C for the ref data
X[X$ORF == "YDL227C", ]$OrfRep <- "YDL227C"
# Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used.
# That is the first DM plates are removed from the data set with the following.
# This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin
# X <- X[X$Conc1 != "0ug/ml", ]
X <- X[X$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment
# Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates.
# This code removes those spots from the data set used for the interaction analysis.
# Dr.Hartman feels that these donot effect Zscores significantly and so "non-curated" files were used.
# try(X <- X[X$ORF != "bad_spot", ])
# Get total number of drug concentrations
total_conc_nums <- length(unique(X$Conc))
# Function to ID numbers in string with characters+numbers (ie to get numeric drug conc)
numextract <- function(string) {
str_extract(string, "\\-*\\d+\\.*\\d*")
}
# Generate a new column with the numeric drug concs
X$Conc_Num <- as.numeric(numextract(X$Conc))
# Generate new column with the numeric drug concs as factors starting at 0 for the graphing later
X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1
# Get the max factor for concentration
MAX_CONC <- max(X$Conc_Num_Factor)
# If treating numbers not as factors uncomment next line and comment out previous line
# MAX_CONC <- max(X$Conc_Num)
# Remove wells with problems for making graphs and to not include in summary statistics
X <- X[X$Gene != "BLANK", ]
X <- X[X$Gene != "Blank", ]
X <- X[X$ORF != "Blank", ]
X <- X[X$Gene != "blank", ]
# X <- X[X$Gene != "HO", ]
Xbu <- X
# Use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script).
# This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
# Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to overwrite the RF1&RF2 geneNames
# in the Z_lm_L, K, r&AUC output values. Values correlated well but were off by a multiplier factor.
genes <- data.frame(read.delim(
file = SGDgeneList, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))))
for (i in 1:length(X[, 14])) {
ii <- as.numeric(i)
line_num <- match(X[ii, 14], genes[, 1], nomatch = 1)
OrfRepColNum <- as.numeric(match("OrfRep", names(X)))
if (X[ii, OrfRepColNum] != "YDL227C") {
X[ii, 15] <- genes[line_num, 2]
}
if ((X[ii, 15] == "") || (X[ii, 15] == "OCT1")) {
X[ii, 15] <- X[ii, OrfRepColNum]
}
}
Xblankreplace <- X
# X = Xbu # for restore testing restore X if geneName 'Match' routine needs changing
# Remove dAmPs
# jlh confirmed to leave dAmps in so comment out this section
# DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt"
# Damps <- read.delim(DAmPs_List, header = F)
# X <- X[!(X$ORF %in% Damps$V1), ] # fix this to Damps[, 1]
# XafterDampsRM = X # backup for debugging
# Begin Graphics Boiler Plate Section
# theme elements for plots
theme_publication <- function(base_size = 14, base_family = "sans") {
library(grid)
library(ggthemes)
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(plot.title = element_text(
face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size = unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_publication <- function(...) {
library(scales)
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
scale_colour_Publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
theme_publication_legend_right <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
scale_colour_Publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
# Print timestamp for initial time the code starts
timestamp()
# Begin QC Section
# Part 2 - Quality control
# Print quality control graphs for each dataset before removing contaminated data
# and before adjusting missing data to max theoretical values
# Plate analysis plot
# Plate analysis is a quality check to identify plate effects containing anomalies
Plate_Analysis_L <-
ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication()
Plate_Analysis_K <-
ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication()
Plate_Analysis_r <-
ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication()
Plate_Analysis_AUC <-
ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_publication()
Plate_Analysis_L_Box <-
ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L before quality control") +
theme_publication()
Plate_Analysis_K_Box <-
ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K before quality control") +
theme_publication()
Plate_Analysis_r_Box <-
ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r before quality control") +
theme_publication()
Plate_Analysis_AUC_Box <-
ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC before quality control") +
theme_publication()
# Quality control - values with a high delta background likely have heavy contamination
# Check the frequency of these values
# Report the L and K values of these spots
# Report the number to be removed based on the Delta_Background_Tolerance
X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd
# Raw l vs K before QC
Raw_l_vs_K_beforeQC <-
ggplot(X, aes(l, K, color = as.factor(Conc_Num))) +
geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) +
ggtitle("Raw L vs K before QC") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_beforeQC.pdf", sep = ""), width = 12, height = 8)
Raw_l_vs_K_beforeQC
dev.off()
pgg <- ggplotly(Raw_l_vs_K_beforeQC)
plotly_path <- paste(outDir_QC, "Raw_L_vs_K_beforeQC.html", sep = "")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Set delta background tolerance based on 3 sds from the mean delta background
Delta_Background_Tolerance <- mean(X$Delta_Backgrd) + (delBGFactor * sd(X$Delta_Backgrd))
# Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd))
print(paste("Delta_Background_Tolerance is", Delta_Background_Tolerance, sep = " "))
Plate_Analysis_Delta_Backgrd <-
ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2, position = "jitter") +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") +
theme_publication()
Plate_Analysis_Delta_Backgrd_Box <-
ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") +
theme_publication()
X_Delta_Backgrd_above_Tolerance <-
X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]
X_Delta_Backgrd_above_Tolerance_K_halfmedian <-
(median(X_Delta_Backgrd_above_Tolerance$K, na.rm = TRUE)) / 2
X_Delta_Backgrd_above_Tolerance_L_halfmedian <-
(median(X_Delta_Backgrd_above_Tolerance$l, na.rm = TRUE)) / 2
X_Delta_Backgrd_above_Tolerance_toRemove <-
dim(X_Delta_Backgrd_above_Tolerance)[1]
X_Delta_Backgrd_above_Tolerance_L_vs_K <-
ggplot(X_Delta_Backgrd_above_Tolerance, aes(l, K, color = as.factor(Conc_Num))) +
geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) +
ggtitle(paste("Raw L vs K for strains above delta background threshold of", Delta_Background_Tolerance, "or above")) +
annotate("text", x = X_Delta_Backgrd_above_Tolerance_L_halfmedian, y = X_Delta_Backgrd_above_Tolerance_K_halfmedian,
label = paste("Strains above delta background tolerance = ", X_Delta_Backgrd_above_Tolerance_toRemove)
) +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf", sep = ""), width = 12, height = 8)
X_Delta_Backgrd_above_Tolerance_L_vs_K
dev.off()
pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K)
plotly_path <- paste(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html", sep = "")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Frequency plot for all data vs. the delta_background
DeltaBackground_Frequency_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_density() +
ggtitle("Density plot for Delta Background by Conc All Data") + theme_publication_legend_right()
# Bar plot for all data vs. the delta_background
DeltaBackground_Bar_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_bar() +
ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right()
pdf(file = paste(outDir_QC, "Frequency_Delta_Background.pdf", sep = ""), width = 12, height = 8)
print(DeltaBackground_Frequency_Plot)
print(DeltaBackground_Bar_Plot)
dev.off()
# Need to identify missing data, and differentiate between this data and removed data
# so the removed data can get set to NA and the missing data can get set to max theoretical values
# 1 for missing data, 0 for non missing data
# Use "NG" for NoGrowth rather than "missing"
X$NG <- 0
try(X[X$l == 0 & !is.na(X$l), ]$NG <- 1)
# 1 for removed data, 0 non removed data
# Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed"
X$DB <- 0
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$DB <- 1)
# Replace the CPPs for l, r, AUC and K (must be last!) for removed data
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$l <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$r <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$AUC <- NA)
try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$K <- NA)
# QC Plots
Plate_Analysis_L_afterQC <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication()
Plate_Analysis_K_afterQC <-
ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication()
Plate_Analysis_r_afterQC <-
ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication()
Plate_Analysis_AUC_afterQC <-
ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication()
Plate_Analysis_Delta_Backgrd_afterQC <-
ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") +
theme_publication()
Plate_Analysis_L_Box_afterQC <-
ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
Plate_Analysis_K_Box_afterQC <-
ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
Plate_Analysis_r_Box_afterQC <-
ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
Plate_Analysis_AUC_Box_afterQC <-
ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") +
theme_publication()
Plate_Analysis_Delta_Backgrd_Box_afterQC <-
ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") +
theme_publication()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis.pdf", sep = ""), width = 14, height = 9)
Plate_Analysis_L
Plate_Analysis_L_afterQC
Plate_Analysis_K
Plate_Analysis_K_afterQC
Plate_Analysis_r
Plate_Analysis_r_afterQC
Plate_Analysis_AUC
Plate_Analysis_AUC_afterQC
Plate_Analysis_Delta_Backgrd
Plate_Analysis_Delta_Backgrd_afterQC
dev.off()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_Boxplots.pdf", sep = ""), width = 18, height = 9)
Plate_Analysis_L_Box
Plate_Analysis_L_Box_afterQC
Plate_Analysis_K_Box
Plate_Analysis_K_Box_afterQC
Plate_Analysis_r_Box
Plate_Analysis_r_Box_afterQC
Plate_Analysis_AUC_Box
Plate_Analysis_AUC_Box_afterQC
Plate_Analysis_Delta_Backgrd_Box
Plate_Analysis_Delta_Backgrd_Box_afterQC
dev.off()
# Remove the zero values and print plate analysis
X_noZero <- X[which(X$l > 0), ]
Plate_Analysis_L_afterQC_Z <-
ggplot(X_noZero, aes(Scan, l, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
Plate_Analysis_K_afterQC_Z <-
ggplot(X_noZero, aes(Scan, K, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
Plate_Analysis_r_afterQC_Z <-
ggplot(X_noZero, aes(Scan, r, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
Plate_Analysis_AUC_afterQC_Z <-
ggplot(X_noZero, aes(Scan, AUC, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") +
theme_publication()
Plate_Analysis_Delta_Backgrd_afterQC_Z <-
ggplot(X_noZero, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") +
theme_publication()
Plate_Analysis_L_Box_afterQC_Z <-
ggplot(X_noZero, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
Plate_Analysis_K_Box_afterQC_Z <-
ggplot(X_noZero, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
Plate_Analysis_r_Box_afterQC_Z <-
ggplot(X_noZero, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
Plate_Analysis_AUC_Box_afterQC_Z <-
ggplot(X_noZero, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for AUC after quality control") +
theme_publication()
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <-
ggplot(X_noZero, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") +
theme_publication()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_noZeros.pdf", sep = ""), width = 14, height = 9)
Plate_Analysis_L_afterQC_Z
Plate_Analysis_K_afterQC_Z
Plate_Analysis_r_afterQC_Z
Plate_Analysis_AUC_afterQC_Z
Plate_Analysis_Delta_Backgrd_afterQC_Z
dev.off()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_noZeros_Boxplots.pdf", sep = ""), width = 18, height = 9)
Plate_Analysis_L_Box_afterQC_Z
Plate_Analysis_K_Box_afterQC_Z
Plate_Analysis_r_Box_afterQC_Z
Plate_Analysis_AUC_Box_afterQC_Z
Plate_Analysis_Delta_Backgrd_Box_afterQC_Z
dev.off()
# Remove dataset with zeros removed
rm(X_noZero)
# X_test_missing_and_removed <- X[X$Removed == 1, ]
# Calculate summary statistics for all strains, including both background and the deletions
X_stats_ALL <- ddply(
X,
c("Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
max_L = max(l, na.rm = TRUE),
min_L = min(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
se_L = sd_L / sqrt(N - 1),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
max_K = max(K, na.rm = TRUE),
min_K = min(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
se_K = sd_K / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
max_AUC = max(AUC, na.rm = TRUE),
min_AUC = min(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N - 1)
)
# print(X_stats_ALL_L)
write.csv(X_stats_ALL, file = paste(outDir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
# Part 3 - Generate summary statistics and calculate the max theoretical L value
# Calculate the Z score at each drug conc for each deletion strain
# Get the background strains - can be modified to take another argument but for most screens will just be YDL227C
Background_Strains <- c("YDL227C")
# First part of loop will go through for each background strain
# In most cases there will only be one YDL227C
for (s in Background_Strains) {
X_Background <- X[X$OrfRep == s, ]
# If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics
# we may want to consider in some cases giving the max high value to L depending on the data type
if (table(X_Background$l)[1] == 0) {
X_Background[X_Background$l == 0, ]$l <- NA
X_Background[X_Background$K == 0, ]$K <- NA
X_Background[X_Background$r == 0, ]$r <- NA
X_Background[X_Background$AUC == 0, ]$AUC <- NA
}
X_Background <- X_Background[!is.na(X_Background$l), ]
# Get summary stats for L, K, R, AUC
X_stats_BY_L <- ddply(
X_Background,
c("OrfRep", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean = mean(l, na.rm = TRUE),
median = median(l, na.rm = TRUE),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
print(X_stats_BY_L)
X1_SD <- max(X_stats_BY_L$sd)
X_stats_BY_K <- ddply(
X_Background,
c("OrfRep", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(K)),
mean = mean(K, na.rm = TRUE),
median = median(K, na.rm = TRUE),
max = max(K, na.rm = TRUE),
min = min(K, na.rm = TRUE),
sd = sd(K, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
X1_SD_K <- max(X_stats_BY_K$sd)
X_stats_BY_r <- ddply(
X_Background,
c("OrfRep", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = length(r),
mean = mean(r, na.rm = TRUE),
median = median(r, na.rm = TRUE),
max = max(r, na.rm = TRUE),
min = min(r, na.rm = TRUE),
sd = sd(r, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
X1_SD_r <- max(X_stats_BY_r$sd)
X_stats_BY_AUC <- ddply(
X_Background,
c("OrfRep", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = length(AUC),
mean = mean(AUC, na.rm = TRUE),
median = median(AUC, na.rm = TRUE),
max = max(AUC, na.rm = TRUE),
min = min(AUC, na.rm = TRUE),
sd = sd(AUC, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
X1_SD_AUC <- max(X_stats_BY_AUC$sd)
X_stats_BY <- ddply(
X_Background,
c("OrfRep", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
max_L = max(l, na.rm = TRUE),
min_L = min(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
se_L = sd_L / sqrt(N - 1),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
max_K = max(K, na.rm = TRUE),
min_K = min(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
se_K = sd_K / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
max_AUC = max(AUC, na.rm = TRUE),
min_L = min(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N - 1)
)
write.csv(X_stats_BY, file = paste(outDir, "SummaryStats_BackgroundStrains.csv"), row.names = FALSE)
# Calculate the max theoretical L values
# Only look for max values when K is within 2SD of the ref strain
for (q in unique(X$Conc_Num_Factor)) {
if (q == 0) {
X_within_2SD_K <-
X[X$Conc_Num_Factor == q, ]
X_within_2SD_K <-
X_within_2SD_K[!is.na(X_within_2SD_K$l), ]
X_stats_TEMP_K <-
X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ]
X_within_2SD_K <-
X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ]
X_within_2SD_K <-
X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ]
X_outside_2SD_K <-
X[X$Conc_Num_Factor == q, ]
X_outside_2SD_K <-
X_outside_2SD_K[!is.na(X_outside_2SD_K$l), ]
# X_outside_2SD_K_Temp <-
# X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ]
X_outside_2SD_K <-
X_outside_2SD_K[
X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) |
X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ]
# X_outside_2SD_K <-
# X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])), ]
}
if (q > 0) {
X_within_2SD_K_temp <-
X[X$Conc_Num_Factor == q, ]
X_within_2SD_K_temp <-
X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l), ]
X_stats_TEMP_K <-
X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ]
X_within_2SD_K_temp <-
X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ]
X_within_2SD_K_temp <-
X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ]
X_within_2SD_K <-
rbind(X_within_2SD_K, X_within_2SD_K_temp)
X_outside_2SD_K_temp <-
X[X$Conc_Num_Factor == q, ]
X_outside_2SD_K_temp <-
X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l), ]
# X_outside_2SD_K_Temp <-
# X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ]
X_outside_2SD_K_temp <-
X_outside_2SD_K_temp[
X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) |
X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ]
# X_outside_2SD_K_temp <-
# X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) , ]
X_outside_2SD_K <-
rbind(X_outside_2SD_K, X_outside_2SD_K_temp)
}
}
X_stats_BY_L_within_2SD_K <- ddply(
X_within_2SD_K,
c("Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l),
se = sd / sqrt(N - 1),
z_max = (max - mean) / sd
)
print(X_stats_BY_L_within_2SD_K)
X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd)
write.csv(
X_stats_BY_L_within_2SD_K,
file = paste(outDir_QC, "Max_Observed_L_Vals_for_spots_within_2SD_K.csv", sep = ""),
row.names = FALSE
)
X_stats_BY_L_outside_2SD_K <- ddply(
X_outside_2SD_K,
c("Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l),
se = sd / sqrt(N - 1)
)
print(X_stats_BY_L_outside_2SD_K)
X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd)
# X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l, ]
Outside_2SD_K_L_vs_K <-
ggplot(X_outside_2SD_K, aes(l, K, color = as.factor(Conc_Num))) +
geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) +
ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf", sep = ""), width = 10, height = 8)
print(Outside_2SD_K_L_vs_K)
dev.off()
pgg <- ggplotly(Outside_2SD_K_L_vs_K)
plotly_path <- paste(outDir_QC, "RawL_vs_K_for_strains_outside_2SD_K.html", sep = "")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
Outside_2SD_K_delta_background_vs_K <-
ggplot(X_outside_2SD_K, aes(Delta_Backgrd, K, color = as.factor(Conc_Num))) +
geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") +
ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf", sep = ""), width = 10, height = 8)
Outside_2SD_K_delta_background_vs_K
dev.off()
pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K)
# pgg
plotly_path <- paste(outDir_QC, "DeltaBackground_vs_K_for_strains_outside_2SD_K.html", sep = "")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Get the background strain mean values at the no drug conc to calculate shift
Background_L <- X_stats_BY_L$mean[1]
Background_K <- X_stats_BY_K$mean[1]
Background_r <- X_stats_BY_r$mean[1]
Background_AUC <- X_stats_BY_AUC$mean[1]
# Create empty plots for plotting element
p_l <- ggplot()
p_K <- ggplot()
p_r <- ggplot()
p_AUC <- ggplot()
p_rf_l <- ggplot()
p_rf_K <- ggplot()
p_rf_r <- ggplot()
p_rf_AUC <- ggplot()
# Get only the deletion strains
X2 <- X
X2 <- X2[X2$OrfRep != "YDL227C", ]
# If set to max theoretical value, add a 1 to SM, if not, leave as 0
# SM = Set to Max
X2$SM <- 0
# Set the missing values to the highest theoretical value at each drug conc for L
# Leave other values as 0 for the max/min
for (i in 1:length(unique(X2$Conc_Num))) {
Concentration <- unique(X2$Conc_Num)[i]
X2_temp <- X2[X2$Conc_Num == Concentration, ]
if (Concentration == 0) {
X2_new <- X2_temp
print(paste("Check loop order, conc =", Concentration, sep = " "))
}
if (Concentration > 0) {
try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1)
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
# X2_temp[X2_temp$K == 0, ]$K <- X_stats_ALL_K$max[i]
# X2_temp[X2_temp$r == 0, ]$r <- X_stats_ALL_r$max[i]
# X2_temp[X2_temp$AUC == 0, ]$AUC <- X_stats_ALL_AUC$max[i]
print(paste("Check loop order, conc =", Concentration, sep = " "))
X2_new <- rbind(X2_new, X2_temp)
}
}
X2 <- X2_new
# Get only the RF strains
X2_RF <- X
X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C", ]
# If set to max theoretical value, add a 1 to SM, if not, leave as 0
# SM = Set to Max
X2_RF$SM <- 0
# Set the missing values to the highest theoretical value at each drug conc for L
# Leave other values as 0 for the max/min
for (i in 1:length(unique(X2_RF$Conc_Num))) {
Concentration <- unique(X2_RF$Conc_Num)[i]
X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration, ]
if (Concentration == 0) {
X2_RF_new <- X2_RF_temp
print(paste("Check loop order, conc =", Concentration, sep = " "))
}
if (Concentration > 0) {
try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1)
try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
print(paste("If error, refs have no L values outside theoretical max L, for REFs, conc =", Concentration, sep = " "))
X2_RF_new <- rbind(X2_RF_new, X2_RF_temp)
}
}
X2_RF <- X2_RF_new
# Part 4 Get the RF Z score values
# Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score
X2_RF$OrfRep <- paste(X2_RF$OrfRep, X2_RF$Gene, X2_RF$Num., sep = "_")
num_genes_RF <- length(unique(X2_RF$OrfRep))
# print(num_genes_RF)
# Create the output data.frame containing columns for each RF strain
InteractionScores_RF <- unique(X2_RF["OrfRep"])
# InteractionScores_RF$Gene <- unique(X2$Gene)
InteractionScores_RF$Gene <- NA
InteractionScores_RF$Raw_Shift_L <- NA
InteractionScores_RF$Z_Shift_L <- NA
InteractionScores_RF$lm_Score_L <- NA
InteractionScores_RF$Z_lm_L <- NA
InteractionScores_RF$R_Squared_L <- NA
InteractionScores_RF$Sum_Z_Score_L <- NA
InteractionScores_RF$Avg_Zscore_L <- NA
InteractionScores_RF$Raw_Shift_K <- NA
InteractionScores_RF$Z_Shift_K <- NA
InteractionScores_RF$lm_Score_K <- NA
InteractionScores_RF$Z_lm_K <- NA
InteractionScores_RF$R_Squared_K <- NA
InteractionScores_RF$Sum_Z_Score_K <- NA
InteractionScores_RF$Avg_Zscore_K <- NA
InteractionScores_RF$Raw_Shift_r <- NA
InteractionScores_RF$Z_Shift_r <- NA
InteractionScores_RF$lm_Score_r <- NA
InteractionScores_RF$Z_lm_r <- NA
InteractionScores_RF$R_Squared_r <- NA
InteractionScores_RF$Sum_Z_Score_r <- NA
InteractionScores_RF$Avg_Zscore_r <- NA
InteractionScores_RF$Raw_Shift_AUC <- NA
InteractionScores_RF$Z_Shift_AUC <- NA
InteractionScores_RF$lm_Score_AUC <- NA
InteractionScores_RF$Z_lm_AUC <- NA
InteractionScores_RF$R_Squared_AUC <- NA
InteractionScores_RF$Sum_Z_Score_AUC <- NA
InteractionScores_RF$Avg_Zscore_AUC <- NA
InteractionScores_RF$NG <- NA
InteractionScores_RF$SM <- NA
for (i in 1:num_genes_RF) {
# Get each deletion strain ORF
Gene_Sel <- unique(X2_RF$OrfRep)[i]
# Extract only the current deletion strain and its data
X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel, ]
X_stats_interaction <- ddply(
X_Gene_Sel,
c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
se_L = sd_L / sqrt(N - 1),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
se_K = sd_K / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N - 1),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
# Get shift vals
# if L is 0, that means the no growth on no drug
# if L is NA at 0, that means the spot was removed due to contamination
# if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
# otherwise calculate shift at no drug conc
if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) {
X_stats_interaction$Raw_Shift_L <- 0
X_stats_interaction$Raw_Shift_K <- 0
X_stats_interaction$Raw_Shift_r <- 0
X_stats_interaction$Raw_Shift_AUC <- 0
X_stats_interaction$Z_Shift_L <- 0
X_stats_interaction$Z_Shift_K <- 0
X_stats_interaction$Z_Shift_r <- 0
X_stats_interaction$Z_Shift_AUC <- 0
} else {
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1]
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1]
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1]
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1]
}
# Get WT vals
X_stats_interaction$WT_l <- X_stats_BY_L$mean
X_stats_interaction$WT_K <- X_stats_BY_K$mean
X_stats_interaction$WT_r <- X_stats_BY_r$mean
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
# Get WT SD
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
# Only get scores if there's growth at no drug
if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) {
# Calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
# Calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
# Disregard shift for no growth values in Z score calculation
if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC
}
# Disregard shift for set to max values in Z score calculation
if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AU
}
# Calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC)
# Get linear model
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction)
# Get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
r_squared_l <- summary(gene_lm_L)$r.squared
gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
r_squared_K <- summary(gene_lm_K)$r.squared
gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
# Get total of non removed values
Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1
# Report the scores
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_Gene_Sel$OrfRep[1]
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_Gene_Sel$Gene[1]
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_L[1]
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_L[1]
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
gene_interaction_L
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
r_squared_l
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_L, na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc)
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_K[1]
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_K[1]
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
gene_interaction_K
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
r_squared_K
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_K, na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc)
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_r[1]
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_r[1]
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
gene_interaction_r
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
r_squared_r
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_r, na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_AUC[1]
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
gene_interaction_AUC
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
r_squared_AUC
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE)
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)
}
if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) {
# Calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
# Calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
# Disregard shift for missing values in Z score calculation
if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC
}
# Disregard shift for set to max values in Z score calculation
if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC
}
# Calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC)
# NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_L <- NA
gene_lm_K <- NA
gene_lm_r <- NA
gene_lm_AUC <- NA
gene_interaction_L <- NA
r_squared_l <- NA
gene_interaction_K <- NA
r_squared_K <- NA
gene_interaction_r <- NA
r_squared_r <- NA
gene_interaction_AUC <- NA
r_squared_AUC <- NA
X_stats_interaction$Raw_Shift_L <- NA
X_stats_interaction$Raw_Shift_K <- NA
X_stats_interaction$Raw_Shift_r <- NA
X_stats_interaction$Raw_Shift_AUC <- NA
X_stats_interaction$Z_Shift_L <- NA
X_stats_interaction$Z_Shift_K <- NA
X_stats_interaction$Z_Shift_r <- NA
X_stats_interaction$Z_Shift_AUC <- NA
InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1]
InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1]
InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L
InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K
InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r
InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC
InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA
}
if (i == 1) {
X_stats_interaction_ALL_RF <- X_stats_interaction
}
if (i > 1) {
X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF, X_stats_interaction)
}
InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE)
InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE)
InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE)
# X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int)
}
print("Pass RF Calculation loop")
lm_sd_L <- sd(InteractionScores_RF$lm_Score_L, na.rm = TRUE)
lm_sd_K <- sd(InteractionScores_RF$lm_Score_K, na.rm = TRUE)
lm_sd_r <- sd(InteractionScores_RF$lm_Score_r, na.rm = TRUE)
lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE)
lm_mean_L <- mean(InteractionScores_RF$lm_Score_L, na.rm = TRUE)
lm_mean_K <- mean(InteractionScores_RF$lm_Score_K, na.rm = TRUE)
lm_mean_r <- mean(InteractionScores_RF$lm_Score_r, na.rm = TRUE)
lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE)
print(paste("Mean RF linear regression score L", lm_mean_L))
InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L) / (lm_sd_L)
InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K) / (lm_sd_K)
InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r) / (lm_sd_r)
InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC) / (lm_sd_AUC)
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L, decreasing = TRUE), ]
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG, decreasing = TRUE), ]
write.csv(InteractionScores_RF, paste(outDir, "RF_ZScores_Interaction.csv", sep = ""), row.names = FALSE)
for (i in 1:num_genes_RF) {
Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i]
X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel, ]
X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel, ]
p_rf_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(X_Int_Scores$Avg_Zscore_L, 2))) +
annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(X_Int_Scores$Z_lm_L, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))
) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_rf_K[[i]] <- ggplot(
X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) +
annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_L, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))
) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_rf_r[[i]] <- ggplot(
X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-0.65, 0.65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) +
annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_r, 2))) +
annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))
) +
scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
theme_publication()
p_rf_AUC[[i]] <- ggplot(
X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-6500, 6500)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) +
annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_AUC, 2))) +
annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))
) +
scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
theme_publication()
if (i == 1) {
X_stats_interaction_ALL_RF_final <- X_ZCalculations
}
if (i > 1) {
X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final, X_ZCalculations)
}
}
print("Pass RF ggplot loop")
write.csv(X_stats_interaction_ALL_RF_final, paste(outDir, "RF_ZScore_Calculations.csv", sep = ""), row.names = FALSE)
# Part 5 - Get Zscores for Gene deletion strains
# Get total number of genes for the next loop
num_genes <- length(unique(X2$OrfRep))
# print(num_genes)
# Create the output data.frame containing columns for each deletion strain
InteractionScores <- unique(X2["OrfRep"])
# InteractionScores$Gene <- unique(X2$Gene)
InteractionScores$Gene <- NA
InteractionScores$Raw_Shift_L <- NA
InteractionScores$Z_Shift_L <- NA
InteractionScores$lm_Score_L <- NA
InteractionScores$Z_lm_L <- NA
InteractionScores$R_Squared_L <- NA
InteractionScores$Sum_Z_Score_L <- NA
InteractionScores$Avg_Zscore_L <- NA
InteractionScores$Raw_Shift_K <- NA
InteractionScores$Z_Shift_K <- NA
InteractionScores$lm_Score_K <- NA
InteractionScores$Z_lm_K <- NA
InteractionScores$R_Squared_K <- NA
InteractionScores$Sum_Z_Score_K <- NA
InteractionScores$Avg_Zscore_K <- NA
InteractionScores$Raw_Shift_r <- NA
InteractionScores$Z_Shift_r <- NA
InteractionScores$lm_Score_r <- NA
InteractionScores$Z_lm_r <- NA
InteractionScores$R_Squared_r <- NA
InteractionScores$Sum_Z_Score_r <- NA
InteractionScores$Avg_Zscore_r <- NA
InteractionScores$Raw_Shift_AUC <- NA
InteractionScores$Z_Shift_AUC <- NA
InteractionScores$lm_Score_AUC <- NA
InteractionScores$Z_lm_AUC <- NA
InteractionScores$R_Squared_AUC <- NA
InteractionScores$Sum_Z_Score_AUC <- NA
InteractionScores$Avg_Zscore_AUC <- NA
InteractionScores$NG <- NA
InteractionScores$DB <- NA
InteractionScores$SM <- NA
for (i in 1:num_genes) {
# Get each deletion strain ORF
Gene_Sel <- unique(X2$OrfRep)[i]
# Extract only the current deletion strain and its data
X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel, ]
X_stats_interaction <- ddply(
X_Gene_Sel,
c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"),
summarise,
N = (length(l)),
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
se_L = sd_L / sqrt(N - 1),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
se_K = sd_K / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N - 1),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
# Get shift vals
# if L is 0, that means the no growth on no drug
# if L is NA at 0, that means the spot was removed due to contamination
# if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
# otherwise calculate shift at no drug conc
if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) {
X_stats_interaction$Raw_Shift_L <- 0
X_stats_interaction$Raw_Shift_K <- 0
X_stats_interaction$Raw_Shift_r <- 0
X_stats_interaction$Raw_Shift_AUC <- 0
X_stats_interaction$Z_Shift_L <- 0
X_stats_interaction$Z_Shift_K <- 0
X_stats_interaction$Z_Shift_r <- 0
X_stats_interaction$Z_Shift_AUC <- 0
} else {
X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L
X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K
X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r
X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC
X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1]
X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1]
X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1]
X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1]
}
# Get WT vals
X_stats_interaction$WT_l <- X_stats_BY_L$mean
X_stats_interaction$WT_K <- X_stats_BY_K$mean
X_stats_interaction$WT_r <- X_stats_BY_r$mean
X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean
# Get WT SD
X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd
X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd
X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd
X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd
# Only get scores if there's growth at no drug
if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) {
# Calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
# Calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
# Disregard shift for no growth values in Z score calculation
if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC
}
# Disregard shift for set to max values in Z score calculation
if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC
}
# Calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC)
# Get linear model
gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction)
gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction)
# Get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1]
r_squared_l <- summary(gene_lm_L)$r.squared
gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1]
r_squared_K <- summary(gene_lm_K)$r.squared
gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1]
r_squared_AUC <- summary(gene_lm_AUC)$r.squared
# Get total of non removed values
Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1
# Report the scores
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <-
as.character(X_Gene_Sel$OrfRep[1])
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <-
as.character(X_Gene_Sel$Gene[1])
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_L[1]
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_L[1]
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <-
gene_interaction_L
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <-
(gene_interaction_L - lm_mean_L) / lm_sd_L
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <-
r_squared_l
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_L, na.rm = TRUE)
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc)
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_K[1]
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_K[1]
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <-
gene_interaction_K
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-
(gene_interaction_K - lm_mean_K) / lm_sd_K
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <-
r_squared_K
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_K, na.rm = TRUE)
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc)
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_r[1]
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_r[1]
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <-
gene_interaction_r
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <-
(gene_interaction_r - lm_mean_r) / lm_sd_r
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <-
r_squared_r
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_r, na.rm = TRUE)
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <-
X_stats_interaction$Z_Shift_AUC[1]
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <-
gene_interaction_AUC
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <-
(gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <-
r_squared_AUC
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE)
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <-
sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)
}
if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) {
# Calculate expected values
X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L
X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K
X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r
X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC
# Calculate normalized delta values
X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L
X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K
X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r
X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC
# Disregard shift for missing values in Z score calculatiom
if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r
X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <-
X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC
}
# Disregard shift for set to max values in Z score calculation
if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) {
X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <-
X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r
# X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <-
# X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC
}
# Calculate Z score at each concentration
X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l)
X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K)
X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r)
X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC)
# NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_L <- NA
gene_lm_K <- NA
gene_lm_r <- NA
gene_interaction_L <- NA
r_squared_l <- NA
gene_interaction_K <- NA
r_squared_K <- NA
gene_interaction_r <- NA
r_squared_r <- NA
X_stats_interaction$Raw_Shift_L <- NA
X_stats_interaction$Raw_Shift_K <- NA
X_stats_interaction$Raw_Shift_r <- NA
X_stats_interaction$Raw_Shift_AUC <- NA
X_stats_interaction$Z_Shift_L <- NA
X_stats_interaction$Z_Shift_K <- NA
X_stats_interaction$Z_Shift_r <- NA
X_stats_interaction$Z_Shift_AUC <- NA
InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1])
InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1])
InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1]
InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1]
InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l
InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1]
InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1]
InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K
InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1]
InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1]
InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r
InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1]
InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1]
InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC
InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA
}
if (i == 1) {
X_stats_interaction_ALL <- X_stats_interaction
}
if (i > 1) {
X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL, X_stats_interaction)
}
InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE)
InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE)
InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE)
# X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int)
}
print("Pass Int Calculation loop")
InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L, decreasing = TRUE), ]
InteractionScores <- InteractionScores[order(InteractionScores$NG, decreasing = TRUE), ]
df_order_by_OrfRep <- unique(InteractionScores$OrfRep)
# X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG, decreasing = TRUE), ]
write.csv(InteractionScores, paste(outDir, "ZScores_Interaction.csv", sep = ""), row.names = FALSE)
InteractionScores_deletion_enhancers_L <-
InteractionScores[InteractionScores$Avg_Zscore_L >= 2, ]
InteractionScores_deletion_enhancers_K <-
InteractionScores[InteractionScores$Avg_Zscore_K <= -2, ]
InteractionScores_deletion_suppressors_L <-
InteractionScores[InteractionScores$Avg_Zscore_L <= -2, ]
InteractionScores_deletion_suppressors_K <-
InteractionScores[InteractionScores$Avg_Zscore_K >= 2, ]
InteractionScores_deletion_enhancers_and_Suppressors_L <-
InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2, ]
InteractionScores_deletion_enhancers_and_Suppressors_K <-
InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2, ]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <-
InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2, ]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <-
InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2, ]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <-
InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2, ]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <-
InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2, ]
InteractionScores_deletion_enhancers_L <-
InteractionScores_deletion_enhancers_L[
!is.na(InteractionScores_deletion_enhancers_L$OrfRep), ]
InteractionScores_deletion_enhancers_K <-
InteractionScores_deletion_enhancers_K[
!is.na(InteractionScores_deletion_enhancers_K$OrfRep), ]
InteractionScores_deletion_suppressors_L <-
InteractionScores_deletion_suppressors_L[
!is.na(InteractionScores_deletion_suppressors_L$OrfRep), ]
InteractionScores_deletion_suppressors_K <-
InteractionScores_deletion_suppressors_K[
!is.na(InteractionScores_deletion_suppressors_K$OrfRep), ]
InteractionScores_deletion_enhancers_and_Suppressors_L <-
InteractionScores_deletion_enhancers_and_Suppressors_L[
!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep), ]
InteractionScores_deletion_enhancers_and_Suppressors_K <-
InteractionScores_deletion_enhancers_and_Suppressors_K[
!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep), ]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <-
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[
!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep), ]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <-
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[
!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep), ]
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <-
InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[
!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep), ]
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <-
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[
!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep), ]
write.csv(InteractionScores_deletion_enhancers_L,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_L.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_K,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_K.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_L,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_L.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_K,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_K.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,
paste(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,
paste(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,
paste(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,
paste(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv", sep = ""), row.names = FALSE)
# Get enhancers and suppressors for linear regression
InteractionScores_deletion_enhancers_L_lm <-
InteractionScores[InteractionScores$Z_lm_L >= 2, ]
InteractionScores_deletion_enhancers_K_lm <-
InteractionScores[InteractionScores$Z_lm_K <= -2, ]
InteractionScores_deletion_suppressors_L_lm <-
InteractionScores[InteractionScores$Z_lm_L <= -2, ]
InteractionScores_deletion_suppressors_K_lm <-
InteractionScores[InteractionScores$Z_lm_K >= 2, ]
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <-
InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2, ]
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <-
InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2, ]
InteractionScores_deletion_enhancers_L_lm <-
InteractionScores_deletion_enhancers_L_lm[
!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep), ]
InteractionScores_deletion_enhancers_K_lm <-
InteractionScores_deletion_enhancers_K_lm[
!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep), ]
InteractionScores_deletion_suppressors_L_lm <-
InteractionScores_deletion_suppressors_L_lm[
!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep), ]
InteractionScores_deletion_suppressors_K_lm <-
InteractionScores_deletion_suppressors_K_lm[
!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep), ]
InteractionScores_deletion_enhancers_and_Suppressors_L_lm <-
InteractionScores_deletion_enhancers_and_Suppressors_L_lm[
!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep), ]
InteractionScores_deletion_enhancers_and_Suppressors_K_lm <-
InteractionScores_deletion_enhancers_and_Suppressors_K_lm[
!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep), ]
write.csv(InteractionScores_deletion_enhancers_L_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_K_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_L_lm,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_K_lm,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv", sep = ""), row.names = FALSE)
write.csv(Labels, file = paste(studyInfo), row.names = FALSE)
# write.table(Labels, file = paste("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE)
for (i in 1:num_genes) {
Gene_Sel <- unique(InteractionScores$OrfRep)[i]
X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel, ]
X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel, ]
p_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_L, 2))) +
annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_L, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_K[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) +
annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_K, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_r[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-0.65, 0.65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) +
annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_r, 2))) +
annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
theme_publication()
p_AUC[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-6500, 6500)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) +
annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_AUC, 2))) +
annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) +
annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) +
annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X_ZCalculations$Conc_Num_Factor),
labels = unique(as.character(X_ZCalculations$Conc_Num))) +
scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
theme_publication()
if (i == 1) {
X_stats_interaction_ALL_final <- X_ZCalculations
}
if (i > 1) {
X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final, X_ZCalculations)
}
}
print("Pass Int ggplot loop")
write.csv(X_stats_interaction_ALL_final, paste(outDir, "ZScore_Calculations.csv", sep = ""), row.names = FALSE)
Blank <- ggplot(X2_RF) + geom_blank()
pdf(paste(outDir, "InteractionPlots.pdf", sep = ""), width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(
X2_RF,
c("Conc_Num", "Conc_Num_Factor"),
summarise,
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
max_L = max(l, na.rm = TRUE),
min_L = min(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
max_K = max(K, na.rm = TRUE),
min_K = min(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
max_AUC = max(AUC, na.rm = TRUE),
min_AUC = min(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) +
annotate("text", x = -0.25, y = 10, label = "NG") +
annotate("text", x = -0.25, y = 5, label = "DB") +
annotate("text", x = -0.25, y = 0, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) +
theme_publication()
K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) +
annotate("text", x = -0.25, y = -5, label = "NG") +
annotate("text", x = -0.25, y = -12.5, label = "DB") +
annotate("text", x = -0.25, y = -20, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) +
theme_publication()
R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) +
annotate("text", x = -0.25, y = .9, label = "NG") +
annotate("text", x = -0.25, y = .8, label = "DB") +
annotate("text", x = -0.25, y = .7, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) +
theme_publication()
AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) +
annotate("text", x = -0.25, y = 11000, label = "NG") +
annotate("text", x = -0.25, y = 10000, label = "DB") +
annotate("text", x = -0.25, y = 9000, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) +
theme_publication()
L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 160)) +
theme_publication()
K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
theme_publication()
r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
theme_publication()
AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 12500)) +
theme_publication()
grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2)
grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2)
# Plot the references
# grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4)
# grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2)
# Loop for grid arrange 4x3
j <- rep(1, ((num_genes) / 3) - 1)
for (n in 1:length(j)) {
j[n + 1] <- n * 3 + 1
}
# Loop for printing each plot
num <- 0
for (m in 1:(round((num_genes) / 3) - 1)) {
num <- j[m]
grid.arrange(p_l[[num]], p_K[[num]], p_r[[num]], p_AUC[[num]],
p_l[[num + 1]], p_K[[num + 1]], p_r[[num + 1]], p_AUC[[num + 1]],
p_l[[num + 2]], p_K[[num + 2]], p_r[[num + 2]], p_AUC[[num + 2]], ncol = 4, nrow = 3)
# grid.arrange(p_l[[364]], p_K[[364]], p_r[[364]],
# p_l[[365]], p_K[[365]], p_r[[365]], p_l[[366]], p_K[[366]], p_r[[366]], ncol = 3, nrow = 3)
# p1[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_K[[num + 4]], p_r[[num + 4]]
}
if (num_genes != (num + 2)) {
total_num <- num_genes - (num + 2)
if (total_num == 5) {
grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]],
p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]],
p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3)
grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]],
p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], p_AUC[[num + 7]],
Blank, Blank, Blank, Blank, ncol = 4, nrow = 3)
}
if (total_num == 4) {
grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]],
p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]],
p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3)
grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]],
Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3)
}
if (total_num == 3) {
grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]],
p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]],
p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
if (total_num == 2) {
grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]],
p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]],
Blank, Blank, Blank, Blank, ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
if (total_num == 1) {
grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]],
Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
}
dev.off()
pdf(paste(outDir, "RF_InteractionPlots.pdf", sep = ""), width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(
X2_RF,
c("Conc_Num", "Conc_Num_Factor"),
summarise,
mean_L = mean(l, na.rm = TRUE),
median_L = median(l, na.rm = TRUE),
max_L = max(l, na.rm = TRUE),
min_L = min(l, na.rm = TRUE),
sd_L = sd(l, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
max_K = max(K, na.rm = TRUE),
min_K = min(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
max_AUC = max(AUC, na.rm = TRUE),
min_AUC = min(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
annotate("text", x = -0.25, y = 10, label = "NG") +
annotate("text", x = -0.25, y = 5, label = "DB") +
annotate("text", x = -0.25, y = 0, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) +
theme_publication()
K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(-20, 160)) +
annotate("text", x = -0.25, y = -5, label = "NG") +
annotate("text", x = -0.25, y = -12.5, label = "DB") +
annotate("text", x = -0.25, y = -20, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) +
theme_publication()
R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
annotate("text", x = -0.25, y = .9, label = "NG") +
annotate("text", x = -0.25, y = .8, label = "DB") +
annotate("text", x = -0.25, y = .7, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) +
theme_publication()
AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 12500)) +
annotate("text", x = -0.25, y = 11000, label = "NG") +
annotate("text", x = -0.25, y = 10000, label = "DB") +
annotate("text", x = -0.25, y = 9000, label = "SM") +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) +
annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) +
theme_publication()
L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
theme_publication()
K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 160)) +
theme_publication()
r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
theme_publication()
AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) +
geom_boxplot() +
scale_x_discrete(
name = unique(X$Drug[1]),
breaks = unique(X2_RF$Conc_Num_Factor),
labels = as.character(unique(X2_RF$Conc_Num))
) +
ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) +
theme_publication()
grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2)
grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2)
# Plot the references
# grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4)
# grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2)
# Loop for grid arrange 4x3
j <- rep(1, ((num_genes_RF) / 3) - 1)
for (n in 1:length(j)) {
j[n + 1] <- n * 3 + 1
}
# Loop for printing each plot
num <- 0
for (m in 1:(round((num_genes_RF) / 3) - 1)) {
num <- j[m]
grid.arrange(p_rf_l[[num]], p_rf_K[[num]], p_rf_r[[num]], p_rf_AUC[[num]],
p_rf_l[[num + 1]], p_rf_K[[num + 1]], p_rf_r[[num + 1]], p_rf_AUC[[num + 1]],
p_rf_l[[num + 2]], p_rf_K[[num + 2]], p_rf_r[[num + 2]], p_rf_AUC[[num + 2]], ncol = 4, nrow = 3)
# grid.arrange(p_rf_l[[364]], p_rf_K[[364]], p_rf_r[[364]],
# p_rf_l[[365]], p_rf_K[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_K[[366]], p_rf_r[[366]], ncol = 3, nrow = 3)
# p1[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]]
}
if (num_genes_RF != (num + 2)) {
total_num <- num_genes_RF - (num + 2)
if (total_num == 5) {
grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]],
p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]],
p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3
)
grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]],
p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], p_rf_AUC[[num + 7]],
Blank, Blank, Blank, Blank, ncol = 4, nrow = 3
)
}
if (total_num == 4) {
grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]],
p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]],
p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3
)
grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]],
Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3
)
}
if (total_num == 3) {
grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]],
p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]],
p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
if (total_num == 2) {
grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]],
p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]],
Blank, Blank, Blank, Blank, ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
if (total_num == 1) {
grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]],
Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3)
}
}
dev.off()
# Print rank plots for L and K gene interactions
InteractionScores_AdjustMissing <- InteractionScores
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L), ]$Avg_Zscore_L <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K), ]$Avg_Zscore_K <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r), ]$Avg_Zscore_r <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC), ]$Avg_Zscore_AUC <- 0.001
InteractionScores_AdjustMissing$L_Rank <- NA
InteractionScores_AdjustMissing$K_Rank <- NA
InteractionScores_AdjustMissing$r_Rank <- NA
InteractionScores_AdjustMissing$AUC_Rank <- NA
InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L)
InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K)
InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r)
InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC)
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L), ]$Z_lm_L <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K), ]$Z_lm_K <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r), ]$Z_lm_r <- 0.001
InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC), ]$Z_lm_AUC <- 0.001
InteractionScores_AdjustMissing$L_Rank_lm <- NA
InteractionScores_AdjustMissing$K_Rank_lm <- NA
InteractionScores_AdjustMissing$r_Rank_lm <- NA
InteractionScores_AdjustMissing$AUC_Rank_lm <- NA
InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L)
InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K)
InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r)
InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC)
# Rank plots
Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1, ])[1])) +
theme_publication()
Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2, ])[1])) +
theme_publication()
Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3, ])[1])) +
theme_publication()
Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1, ])[1])) +
theme_publication()
Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2, ])[1])) +
theme_publication()
Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3, ])[1])) +
theme_publication()
Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD,
Rank_L_2SD,
Rank_L_3SD,
Rank_K_1SD,
Rank_K_2SD,
Rank_K_3SD,
ncol = 3,
nrow = 2
)
grid.arrange(
Rank_L_1SD_notext,
Rank_L_2SD_notext,
Rank_L_3SD_notext,
Rank_K_1SD_notext,
Rank_K_2SD_notext,
Rank_K_3SD_notext,
ncol = 3,
nrow = 2
)
dev.off()
Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1, ])[1])) +
theme_publication()
Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2, ])[1])) +
theme_publication()
Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3, ])[1])) +
theme_publication()
Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1, ])[1])) +
theme_publication()
Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2, ])[1])) +
theme_publication()
Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3, ])[1])) +
annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3, ])[1])) +
theme_publication()
Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_lm.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD_lm,
Rank_L_2SD_lm,
Rank_L_3SD_lm,
Rank_K_1SD_lm,
Rank_K_2SD_lm,
Rank_K_3SD_lm,
ncol = 3, nrow = 2
)
grid.arrange(
Rank_L_1SD_notext_lm,
Rank_L_2SD_notext_lm,
Rank_L_3SD_notext_lm,
Rank_K_1SD_notext_lm,
Rank_K_2SD_notext_lm,
Rank_K_3SD_notext_lm,
ncol = 3, nrow = 2
)
dev.off()
X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L), ]
# Find overlaps
X_NArm$Overlap <- "No Effect"
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Both")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Both")
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2, ]$Overlap <- "Deletion Enhancer lm only")
try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2, ]$Overlap <- "Deletion Suppressor lm only")
try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only")
try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score")
try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score")
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_L <- lm(X_NArm$Z_lm_L ~ X_NArm$Avg_Zscore_L)
L_lm <- summary(get_lm_L)
get_lm_K <- lm(X_NArm$Z_lm_K ~ X_NArm$Avg_Zscore_K)
K_lm <- summary(get_lm_K)
get_lm_r <- lm(X_NArm$Z_lm_r ~ X_NArm$Avg_Zscore_r)
r_lm <- summary(get_lm_r)
get_lm_AUC <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Avg_Zscore_AUC)
AUC_lm <- summary(get_lm_AUC)
pdf(paste(outDir, "Avg_Zscore_vs_lm_NA_rm.pdf", sep = ""), width = 16, height = 12, onefile = TRUE)
print(ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(X_NArm, aes(Avg_Zscore_K, Z_lm_K)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm K") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(K_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(X_NArm, aes(Avg_Zscore_r, Z_lm_r)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm r") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(X_NArm, aes(Avg_Zscore_AUC, Z_lm_AUC)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm AUC") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(AUC_lm$r.squared, 2))) + theme_publication_legend_right())
dev.off()
lm_v_Zscore_L <- ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right()
pgg <- ggplotly(lm_v_Zscore_L)
plotly_path <- paste(outDir, "Avg_Zscore_vs_lm_NA_rm.html", sep = "")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L)
X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K)
X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r)
X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC)
X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L)
X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K)
X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r)
X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC)
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_L2 <- lm(X_NArm$L_Rank_lm ~ X_NArm$L_Rank)
L_lm2 <- summary(get_lm_L2)
get_lm_K2 <- lm(X_NArm$K_Rank_lm ~ X_NArm$K_Rank)
K_lm2 <- summary(get_lm_K2)
get_lm_r2 <- lm(X_NArm$r_Rank_lm ~ X_NArm$r_Rank)
r_lm2 <- summary(get_lm_r2)
get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm ~ X_NArm$AUC_Rank)
AUC_lm2 <- summary(get_lm_AUC2)
num_genes_NArm2 <- (dim(X_NArm)[1]) / 2
pdf(paste(outDir, "Avg_Zscore_vs_lm_ranked_NA_rm.pdf", sep = ""), width = 16, height = 12, onefile = TRUE)
print(
ggplot(X_NArm, aes(L_Rank, L_Rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm L") +
annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(L_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(X_NArm, aes(K_Rank, K_Rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm K") +
annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(K_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(X_NArm, aes(r_Rank, r_Rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm r") +
annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(X_NArm, aes(AUC_Rank, AUC_Rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank of Avg Zscore vs lm AUC") +
annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(AUC_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
dev.off()
Rank_L_1SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 1, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -1, ])[1])) +
theme_publication()
Rank_L_2SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 2, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -2, ])[1])) +
theme_publication()
Rank_L_3SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 3, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -3, ])[1])) +
theme_publication()
Rank_K_1SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -1, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 1, ])[1])) +
theme_publication()
Rank_K_2SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -2, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 2, ])[1])) +
theme_publication()
Rank_K_3SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -3, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 3, ])[1])) +
theme_publication()
Rank_L_1SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_2SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_3SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) +
ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_1SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_2SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_3SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) +
ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_naRM.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD,
Rank_L_2SD,
Rank_L_3SD,
Rank_K_1SD,
Rank_K_2SD,
Rank_K_3SD,
ncol = 3, nrow = 2
)
grid.arrange(
Rank_L_1SD_notext,
Rank_L_2SD_notext,
Rank_L_3SD_notext,
Rank_K_1SD_notext,
Rank_K_2SD_notext,
Rank_K_3SD_notext,
ncol = 3, nrow = 2
)
dev.off()
Rank_L_1SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 1, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -1, ])[1])) +
theme_publication()
Rank_L_2SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 2, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -2, ])[1])) +
theme_publication()
Rank_L_3SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 3, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -3, ])[1])) +
theme_publication()
Rank_K_1SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -1, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 1, ])[1])) +
theme_publication()
Rank_K_2SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -2, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 2, ])[1])) +
theme_publication()
Rank_K_3SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -3, ])[1])) +
annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 3, ])[1])) +
theme_publication()
Rank_L_1SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_2SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_L_3SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) +
ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_1SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_2SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
Rank_K_3SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) +
ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_lm_naRM.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD_lm,
Rank_L_2SD_lm,
Rank_L_3SD_lm,
Rank_K_1SD_lm,
Rank_K_2SD_lm,
Rank_K_3SD_lm,
ncol = 3, nrow = 2
)
grid.arrange(
Rank_L_1SD_notext_lm,
Rank_L_2SD_notext_lm,
Rank_L_3SD_notext_lm,
Rank_K_1SD_notext_lm,
Rank_K_2SD_notext_lm,
Rank_K_3SD_notext_lm,
ncol = 3, nrow = 2
)
dev.off()
}
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_1 <- lm(X_NArm$Z_lm_K ~ X_NArm$Z_lm_L)
L_lm_1 <- summary(get_lm_1)
get_lm_2 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_L)
L_lm_2 <- summary(get_lm_2)
get_lm_3 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_L)
L_lm_3 <- summary(get_lm_3)
get_lm_4 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_K)
L_lm_4 <- summary(get_lm_4)
get_lm_5 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_K)
L_lm_5 <- summary(get_lm_5)
get_lm_6 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_r)
L_lm_6 <- summary(get_lm_6)
pdf(file = paste(outDir, "Correlation_CPPs.pdf", sep = ""), width = 10, height = 7, onefile = TRUE)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") +
ylab("z-score K") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18))
InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L), ]
ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_K), color = "cyan") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") +
ylab("z-score K") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_r), color = "cyan") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_AUC), color = "cyan") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_r), color = "cyan") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_AUC), color = "cyan") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = InteractionScores_RF2, aes(Z_lm_r, Z_lm_AUC), color = "cyan") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
dev.off()
# write.csv(Labels, file = paste("../Code/Parameters.csv"), row.names = FALSE)
timestamp()