Files
hartman-server/workflow/apps/r/interactions.R

3452 lines
168 KiB
R

#!/usr/bin/env Rscript
# NOTE: The script now has 6 optional arguments:
# 1. Path to input easy results file
# 2. /output/ directory
# 3. Path to StudyInfo.csv
# 4. Path to sgd_gene_list
# 5. The experiment number (Exp# directory)
# 6. Standard deviation value
library("ggplot2")
library("plyr")
library("extrafont")
library("gridExtra")
library("gplots")
library("RColorBrewer")
library("stringr")
library("gdata")
library("plotly")
library("htmlwidgets")
# Parse arguments
args <- commandArgs(TRUE)
exp_number <- as.numeric(args[1])
delta_bg_factor <- as.numeric(args[2])
study_info_file <- file.path(args[3])
sgd_gene_list <- file.path(args[4])
input_file <- file.path(args[5])
out_dir <- file.path(args[6])
sprintf("The Standard Deviation value is: %d", delta_bg_factor)
out_dir_qc <- file.path(out_dir, "qc")
if (!dir.exists(out_dir)) {
dir.create(out_dir)
}
if (!dir.exists(out_dir_qc)) {
dir.create(out_dir_qc)
}
options(width = 1000)
ls.str()
# Write delBGFactor to the StudyInfo file
# TODO we probably shouldn't be doing this, need one source of truth
# TODO disabling this for now
# labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) # sep = ","
# labels[exp_number, 3] <- delta_bg_factor
# write.csv(Labels, file = study_info_file, row.names = FALSE)
# Begin User Data Selection Section
# Read in the data
df <- read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
df <- df[!(df[[1]] %in% c("", "Scan")), ]
# df <- df[!(df[[1]]%in%c(61:76)), ] # remove dAmp plates which are Scans 61 thru 76
# df <- df[which(df$Specifics == "WT"), ]
# df_length <- length(df[1, ])
# df_end <- length(df[1, ]) - 2
# df <- df[, c(1:42, df_end:df_length)]
# print(names(df))
# Use numeric data to perform operations
df$col <- as.numeric(df$Col)
df$row <- as.numeric(df$Row)
df$l <- as.numeric(df$l)
df$k <- as.numeric(df$K)
df$r <- as.numeric(df$r)
df$scan <- as.numeric(df$Scan)
df$auc <- as.numeric(df$AUC)
df$last_bg <- as.numeric(df$LstBackgrd)
df$first_bg <- as.numeric(df$X1stBackgrd)
# print(df)
# Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col
# as was the case in Gemcitabin and Cytarabin experiments.
# The following allows user to rename columns so as to get the appropriate
# data where it needs to be for the script to run properly.
# colnames(df)[7] <- "Modifier1"
# colnames(df)[8] <- "Conc1"
# colnames(df)[10] <- "Drug"
# colnames(df)[11] <- "Conc"
# Set the OrfRep to YDL227C for the ref data
df[df$ORF == "YDL227C", ]$OrfRep <- "YDL227C"
# Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used.
# That is the first DM plates are removed from the data set with the following.
# This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin
# df <- df[df$Conc1 != "0ug/ml", ]
df <- df[df$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment
# Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates.
# This code removes those spots from the data set used for the interaction analysis.
# Dr.Hartman feels that these donot effect Zscores significantly and so "non-curated" files were used.
# try(df <- df[df$ORF != "bad_spot", ])
# Get total number of drug concentrations
total_conc_nums <- length(unique(df$Conc))
# Function to ID numbers in string with characters+numbers (ie to get numeric drug conc)
numextract <- function(string) {
str_extract(string, "\\-*\\d+\\.*\\d*")
}
# Generate a new column with the numeric drug concs
df$conc_num <- as.numeric(numextract(df$Conc))
# Generate new column with the numeric drug concs as factors starting at 0 for the graphing later
df$conc_num_factor <- as.numeric(as.factor(df$conc_num)) - 1
# Get the max factor for concentration
max_conc <- max(df$conc_num_factor)
# If treating numbers not as factors uncomment next line and comment out previous line
# max_conc <- max(df$conc_num)
# Remove wells with problems for making graphs and to not include in summary statistics
df <- df[df$Gene != "BLANK", ]
df <- df[df$Gene != "Blank", ]
df <- df[df$ORF != "Blank", ]
df <- df[df$Gene != "blank", ]
# df <- df[df$Gene != "HO", ]
# Use sgd_gene_list to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script).
# This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later
# Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to overwrite the RF1&RF2 geneNames
# in the z_lm_l, k, r&auc output values. Values correlated well but were off by a multiplier factor.
genes <- data.frame(read.delim(
file = sgd_gene_list, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))))
for (i in 1:length(df[, 14])) {
ii <- as.numeric(i)
line_num <- match(df[ii, 14], genes[, 1], nomatch = 1)
orf_rep_col_num <- as.numeric(match("OrfRep", names(df)))
if (df[ii, orf_rep_col_num] != "YDL227C") {
df[ii, 15] <- genes[line_num, 2]
}
if ((df[ii, 15] == "") || (df[ii, 15] == "OCT1")) {
df[ii, 15] <- df[ii, orf_rep_col_num]
}
}
# Remove dAmPs
# jlh confirmed to leave dAmps in so comment out this section
# DAmPs_list <- "../Code/22_0602_Remy_DAmPsList.txt"
# Damps <- read.delim(DAmPs_list, header = F)
# df <- df[!(df$ORF %in% Damps$V1), ] # fix this to Damps[, 1]
# dfafterDampsRM = df # backup for debugging
# Begin Graphics Boiler Plate Section
# theme elements for plots
theme_publication <- function(base_size = 14, base_family = "sans") {
library(grid)
library(ggthemes)
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(plot.title = element_text(
face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size = unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_publication <- function(...) {
library(scales)
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
theme_publication_legend_right <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
# Print timestamp for initial time the code starts
timestamp()
# Begin QC Section
# Part 2 - Quality control
# Print quality control graphs for each dataset before removing contaminated data
# and before adjusting missing data to max theoretical values
# Plate analysis plot
# Plate analysis is a quality check to identify plate effects containing anomalies
plate_analysis_l <-
ggplot(df, aes(Scan, l, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication()
plate_analysis_k <-
ggplot(df, aes(Scan, k, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication()
plate_analysis_r <-
ggplot(df, aes(Scan, r, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication()
plate_analysis_auc <-
ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for auc before quality control") + theme_publication()
plate_analysis_l_box <-
ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L before quality control") +
theme_publication()
plate_analysis_k_box <-
ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K before quality control") +
theme_publication()
plate_analysis_r_box <-
ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r before quality control") +
theme_publication()
plate_analysis_auc_box <-
ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for auc before quality control") +
theme_publication()
# Quality control - values with a high delta background likely have heavy contamination
# Check the frequency of these values
# Report the L and k values of these spots
# Report the number to be removed based on the delta_background_tolerance
df$delta_bg <- df$last_bg - df$first_bg
# Raw l vs k before QC
raw_l_vs_k_before_qc <-
ggplot(df, aes(l, k, color = as.factor(conc_num))) +
geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) +
ggtitle("Raw L vs K before QC") +
theme_publication_legend_right()
pdf(file.path(out_dir_qc, "raw_l_vs_k_before_qc.pdf"), width = 12, height = 8)
raw_l_vs_k_before_qc
dev.off()
pgg <- ggplotly(raw_l_vs_k_before_qc)
plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_before_qc.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Set delta background tolerance based on 3 sds from the mean delta background
delta_background_tolerance <- mean(df$delta_bg) + (delta_bg_factor * sd(df$delta_bg))
# delta_background_tolerance <- mean(df$delta_bg)+(3*sd(df$delta_bg))
sprintf("delta_background_tolerance is %f", delta_background_tolerance)
plate_analysis_delta_bg <-
ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2, position = "jitter") +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") +
theme_publication()
plate_analysis_delta_bg_box <-
ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") +
theme_publication()
x_delta_bg_above_tolerance <-
df[df$delta_bg >= delta_background_tolerance, ]
x_delta_bg_above_tolerance_k_halfmedian <-
(median(x_delta_bg_above_tolerance$k, na.rm = TRUE)) / 2
x_delta_bg_above_tolerance_l_halfmedian <-
(median(x_delta_bg_above_tolerance$l, na.rm = TRUE)) / 2
x_delta_bg_above_tolerance_to_remove <-
dim(x_delta_bg_above_tolerance)[1]
x_delta_bg_above_tolerance_l_vs_k <-
ggplot(x_delta_bg_above_tolerance, aes(l, k, color = as.factor(conc_num))) +
geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) +
ggtitle(paste("Raw L vs K for strains above delta background threshold of", delta_background_tolerance, "or above")) +
annotate("text", x = x_delta_bg_above_tolerance_l_halfmedian, y = x_delta_bg_above_tolerance_k_halfmedian,
label = paste("Strains above delta background tolerance = ", x_delta_bg_above_tolerance_to_remove)
) +
theme_publication_legend_right()
pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.pdf"), width = 12, height = 8)
x_delta_bg_above_tolerance_l_vs_k
dev.off()
pgg <- ggplotly(x_delta_bg_above_tolerance_l_vs_k)
plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Frequency plot for all data vs. the delta_background
delta_bg_frequency_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_density() +
ggtitle("Density plot for Delta Background by Conc All Data") + theme_publication_legend_right()
# Bar plot for all data vs. the delta_background
delta_bg_bar_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_bar() +
ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right()
pdf(file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8)
print(delta_bg_frequency_plot)
print(delta_bg_bar_plot)
dev.off()
# Need to identify missing data, and differentiate between this data and removed data
# so the removed data can get set to NA and the missing data can get set to max theoretical values
# 1 for missing data, 0 for non missing data
# Use "NG" for NoGrowth rather than "missing"
df$NG <- 0
try(df[df$l == 0 & !is.na(df$l), ]$NG <- 1)
# 1 for removed data, 0 non removed data
# Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed"
df$DB <- 0
try(df[df$delta_bg >= delta_background_tolerance, ]$DB <- 1)
# Replace the CPPs for l, r, auc and k (must be last!) for removed data
try(df[df$delta_bg >= delta_background_tolerance, ]$l <- NA)
try(df[df$delta_bg >= delta_background_tolerance, ]$r <- NA)
try(df[df$delta_bg >= delta_background_tolerance, ]$auc <- NA)
try(df[df$delta_bg >= delta_background_tolerance, ]$k <- NA)
# QC Plots
plate_analysis_l_after_qc <- ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication()
plate_analysis_k_after_qc <-
ggplot(df, aes(Scan, k, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for k after quality control") + theme_publication()
plate_analysis_r_after_qc <-
ggplot(df, aes(Scan, r, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication()
plate_analysis_auc_after_qc <-
ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication()
plate_analysis_delta_bg_after_qc <-
ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") +
theme_publication()
plate_analysis_l_box_after_qc <-
ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
plate_analysis_k_box_after_qc <-
ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
plate_analysis_r_box_after_qc <-
ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
plate_analysis_auc_box_after_qc <-
ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for auc after quality control") +
theme_publication()
plate_analysis_delta_bg_box_after_qc <-
ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") +
theme_publication()
# Print the plate analysis data before and after QC
pdf(file.path(out_dir_qc, "plate_analysis.pdf"), width = 14, height = 9)
plate_analysis_l
plate_analysis_l_after_qc
plate_analysis_k
plate_analysis_k_after_qc
plate_analysis_r
plate_analysis_r_after_qc
plate_analysis_auc
plate_analysis_auc_after_qc
plate_analysis_delta_bg
plate_analysis_delta_bg_after_qc
dev.off()
# Print the plate analysis data before and after QC
pdf(file.path(out_dir_qc, "plate_analysis_boxplots.pdf"), width = 18, height = 9)
plate_analysis_l_box
plate_analysis_l_box_after_qc
plate_analysis_k_box
plate_analysis_k_box_after_qc
plate_analysis_r_box
plate_analysis_r_box_after_qc
plate_analysis_auc_box
plate_analysis_auc_box_after_qc
plate_analysis_delta_bg_box
plate_analysis_delta_bg_box_after_qc
dev.off()
# Remove the zero values and print plate analysis
x_no_zero <- df[which(df$l > 0), ]
plate_analysis_l_after_qc_z <-
ggplot(x_no_zero, aes(Scan, l, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
plate_analysis_k_after_qc_z <-
ggplot(x_no_zero, aes(Scan, k, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
plate_analysis_r_after_qc_z <-
ggplot(x_no_zero, aes(Scan, r, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
plate_analysis_auc_after_qc_z <-
ggplot(x_no_zero, aes(Scan, auc, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for auc after quality control") +
theme_publication()
plate_analysis_delta_bg_after_qc_z <-
ggplot(x_no_zero, aes(Scan, delta_bg, color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar"
) +
stat_summary(
fun = mean,
geom = "point",
size = 0.6
) +
ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") +
theme_publication()
plate_analysis_l_box_after_qc_z <-
ggplot(x_no_zero, aes(as.factor(Scan), l, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for L after quality control") +
theme_publication()
plate_analysis_k_box_after_qc_z <-
ggplot(x_no_zero, aes(as.factor(Scan), k, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for K after quality control") +
theme_publication()
plate_analysis_r_box_after_qc_z <-
ggplot(x_no_zero, aes(as.factor(Scan), r, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for r after quality control") +
theme_publication()
plate_analysis_auc_box_after_qc_z <-
ggplot(x_no_zero, aes(as.factor(Scan), auc, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for auc after quality control") +
theme_publication()
plate_analysis_delta_bg_box_after_qc_z <-
ggplot(x_no_zero, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") +
theme_publication()
# Print the plate analysis data before and after QC
pdf(file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9)
plate_analysis_l_after_qc_z
plate_analysis_k_after_qc_z
plate_analysis_r_after_qc_z
plate_analysis_auc_after_qc_z
plate_analysis_delta_bg_after_qc_z
dev.off()
# Print the plate analysis data before and after QC
pdf(file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9)
plate_analysis_l_box_after_qc_z
plate_analysis_k_box_after_qc_z
plate_analysis_r_box_after_qc_z
plate_analysis_auc_box_after_qc_z
plate_analysis_delta_bg_box_after_qc_z
dev.off()
# Remove dataset with zeros removed
rm(x_no_zero)
# df_test_missing_and_removed <- df[df$Removed == 1, ]
# Calculate summary statistics for all strains, including both background and the deletions
x_stats_all <- ddply(
df,
c("conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
max_l = max(l, na.rm = TRUE),
min_l = min(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
se_l = sd_l / sqrt(N - 1),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
max_k = max(k, na.rm = TRUE),
min_k = min(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
se_k = sd_k / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
max_auc = max(auc, na.rm = TRUE),
min_auc = min(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
se_auc = sd_auc / sqrt(N - 1)
)
# print(x_stats_all_l)
write.csv(x_stats_all, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
# Part 3 - Generate summary statistics and calculate the max theoretical L value
# Calculate the Z score at each drug conc for each deletion strain
# Get the background strains - can be modified to take another argument but for most screens will just be YDL227C
background_strains <- c("YDL227C")
# First part of loop will go through for each background strain
# In most cases there will only be one YDL227C
for (s in background_strains) {
x_background <- df[df$OrfRep == s, ]
# If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics
# we may want to consider in some cases giving the max high value to L depending on the data type
if (table(x_background$l)[1] == 0) {
x_background[x_background$l == 0, ]$l <- NA
x_background[x_background$k == 0, ]$k <- NA
x_background[x_background$r == 0, ]$r <- NA
x_background[x_background$auc == 0, ]$auc <- NA
}
x_background <- x_background[!is.na(x_background$l), ]
# Get summary stats for L, k, R, auc
x_stats_by_l <- ddply(
x_background,
c("OrfRep", "conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean = mean(l, na.rm = TRUE),
median = median(l, na.rm = TRUE),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
print(x_stats_by_l)
x1_sd <- max(x_stats_by_l$sd)
x_stats_by_k <- ddply(
x_background,
c("OrfRep", "conc_num", "conc_num_factor"),
summarise,
N = (length(k)),
mean = mean(k, na.rm = TRUE),
median = median(k, na.rm = TRUE),
max = max(k, na.rm = TRUE),
min = min(k, na.rm = TRUE),
sd = sd(k, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
x1_sd_k <- max(x_stats_by_k$sd)
x_stats_by_r <- ddply(
x_background,
c("OrfRep", "conc_num", "conc_num_factor"),
summarise,
N = length(r),
mean = mean(r, na.rm = TRUE),
median = median(r, na.rm = TRUE),
max = max(r, na.rm = TRUE),
min = min(r, na.rm = TRUE),
sd = sd(r, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
x1_sd_r <- max(x_stats_by_r$sd)
x_stats_by_auc <- ddply(
x_background,
c("OrfRep", "conc_num", "conc_num_factor"),
summarise,
N = length(auc),
mean = mean(auc, na.rm = TRUE),
median = median(auc, na.rm = TRUE),
max = max(auc, na.rm = TRUE),
min = min(auc, na.rm = TRUE),
sd = sd(auc, na.rm = TRUE),
se = sd / sqrt(N - 1)
)
x1_sd_auc <- max(x_stats_by_auc$sd)
x_stats_by <- ddply(
x_background,
c("OrfRep", "conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
max_l = max(l, na.rm = TRUE),
min_l = min(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
se_l = sd_l / sqrt(N - 1),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
max_k = max(k, na.rm = TRUE),
min_k = min(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
se_k = sd_k / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
max_auc = max(auc, na.rm = TRUE),
min_l = min(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
se_auc = sd_auc / sqrt(N - 1)
)
write.csv(x_stats_by, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE)
# Calculate the max theoretical L values
# Only look for max values when k is within 2sd of the ref strain
for (q in unique(df$conc_num_factor)) {
if (q == 0) {
x_within_2sd_k <-
df[df$conc_num_factor == q, ]
x_within_2sd_k <-
x_within_2sd_k[!is.na(x_within_2sd_k$l), ]
x_stats_temp_k <-
x_stats_by_k[x_stats_by_k$conc_num_factor == q, ]
x_within_2sd_k <-
x_within_2sd_k[x_within_2sd_k$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ]
x_within_2sd_k <-
x_within_2sd_k[x_within_2sd_k$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ]
x_outside_2sd_k <-
df[df$conc_num_factor == q, ]
x_outside_2sd_k <-
x_outside_2sd_k[!is.na(x_outside_2sd_k$l), ]
# x_outside_2sd_k_temp <-
# x_stats_by_k[x_stats_by_k$conc_num_factor == q, ]
x_outside_2sd_k <-
x_outside_2sd_k[
x_outside_2sd_k$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) |
x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ]
# x_outside_2sd_k <-
# x_outside_2sd_k[x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])), ]
}
if (q > 0) {
x_within_2sd_k_temp <-
df[df$conc_num_factor == q, ]
x_within_2sd_k_temp <-
x_within_2sd_k_temp[!is.na(x_within_2sd_k_temp$l), ]
x_stats_temp_k <-
x_stats_by_k[x_stats_by_k$conc_num_factor == q, ]
x_within_2sd_k_temp <-
x_within_2sd_k_temp[x_within_2sd_k_temp$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ]
x_within_2sd_k_temp <-
x_within_2sd_k_temp[x_within_2sd_k_temp$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ]
x_within_2sd_k <-
rbind(x_within_2sd_k, x_within_2sd_k_temp)
x_outside_2sd_k_temp <-
df[df$conc_num_factor == q, ]
x_outside_2sd_k_temp <-
x_outside_2sd_k_temp[!is.na(x_outside_2sd_k_temp$l), ]
# x_outside_2sd_k_temp <-
# x_stats_by_k[x_stats_by_k$conc_num_factor == q, ]
x_outside_2sd_k_temp <-
x_outside_2sd_k_temp[
x_outside_2sd_k_temp$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) |
x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ]
# x_outside_2sd_k_temp <-
# x_outside_2sd_k_temp[x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])) , ]
x_outside_2sd_k <-
rbind(x_outside_2sd_k, x_outside_2sd_k_temp)
}
}
x_stats_by_l_within_2sd_k <- ddply(
x_within_2sd_k,
c("conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l),
se = sd / sqrt(N - 1),
z_max = (max - mean) / sd
)
print(x_stats_by_l_within_2sd_k)
x1_sd_within_2sd_k <- max(x_stats_by_l_within_2sd_k$sd)
write.csv(
x_stats_by_l_within_2sd_k,
file.path(out_dir_qc, "max_observed_l_vals_for_spots_within_2sd_k.csv"),
row.names = FALSE
)
x_stats_by_l_outside_2sd_k <- ddply(
x_outside_2sd_k,
c("conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean = mean(l),
median = median(l),
max = max(l, na.rm = TRUE),
min = min(l, na.rm = TRUE),
sd = sd(l),
se = sd / sqrt(N - 1)
)
print(x_stats_by_l_outside_2sd_k)
x1_sd_outside_2sd_k <- max(x_stats_by_l_outside_2sd_k$sd)
# x1_sd_outside_2sd_k <- df[df$l %in% x1_sd_within_2sd_k$l, ]
outside_2sd_k_l_vs_k <-
ggplot(x_outside_2sd_k, aes(l, k, color = as.factor(conc_num))) +
geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) +
ggtitle("Raw L vs K for strains falling outside 2sd of the K mean at each conc") +
theme_publication_legend_right()
pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8)
print(outside_2sd_k_l_vs_k)
dev.off()
pgg <- ggplotly(outside_2sd_k_l_vs_k)
plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_outside_2sd_k.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
outside_2sd_k_delta_background_vs_k <-
ggplot(x_outside_2sd_k, aes(delta_bg, k, color = as.factor(conc_num))) +
geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") +
ggtitle("DeltaBackground vs K for strains falling outside 2sd of the K mean at each conc") +
theme_publication_legend_right()
pdf(file.path(out_dir_qc, "delta_background_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8)
outside_2sd_k_delta_background_vs_k
dev.off()
pgg <- ggplotly(outside_2sd_k_delta_background_vs_k)
# pgg
plotly_path <- file.path(out_dir_qc, "delta_background_vs_k_for_strains_outside_2sd_k.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Get the background strain mean values at the no drug conc to calculate shift
background_l <- x_stats_by_l$mean[1]
background_k <- x_stats_by_k$mean[1]
background_r <- x_stats_by_r$mean[1]
background_auc <- x_stats_by_auc$mean[1]
# Create empty plots for plotting element
p_l <- ggplot()
p_k <- ggplot()
p_r <- ggplot()
p_auc <- ggplot()
p_rf_l <- ggplot()
p_rf_k <- ggplot()
p_rf_r <- ggplot()
p_rf_auc <- ggplot()
# Get only the deletion strains
df2 <- df
df2 <- df2[df2$OrfRep != "YDL227C", ]
# If set to max theoretical value, add a 1 to SM, if not, leave as 0
# SM = Set to Max
df2$SM <- 0
# Set the missing values to the highest theoretical value at each drug conc for L
# Leave other values as 0 for the max/min
for (i in 1:length(unique(df2$conc_num))) {
concentration <- unique(df2$conc_num)[i]
df2_temp <- df2[df2$conc_num == concentration, ]
if (concentration == 0) {
df2_new <- df2_temp
sprintf("Check loop order, conc = %f", concentration)
}
if (concentration > 0) {
try(df2_temp[df2_temp$l == 0 & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i])
try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1)
try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i])
# df2_temp[df2_temp$k == 0, ]$k <- x_stats_all_k$max[i]
# df2_temp[df2_temp$r == 0, ]$r <- x_stats_all_r$max[i]
# df2_temp[df2_temp$auc == 0, ]$auc <- x_stats_all_auc$max[i]
sprintf("Check loop order, conc = %f", concentration)
df2_new <- rbind(df2_new, df2_temp)
}
}
df2 <- df2_new
# Get only the RF strains
df2_rf <- df
df2_rf <- df2_rf[df2_rf$OrfRep == "YDL227C", ]
# If set to max theoretical value, add a 1 to SM, if not, leave as 0
# SM = Set to Max
df2_rf$SM <- 0
# Set the missing values to the highest theoretical value at each drug conc for L
# Leave other values as 0 for the max/min
for (i in 1:length(unique(df2_rf$conc_num))) {
concentration <- unique(df2_rf$conc_num)[i]
df2_rf_temp <- df2_rf[df2_rf$conc_num == concentration, ]
if (concentration == 0) {
df2_rf_new <- df2_rf_temp
sprintf("Check loop order, conc = %f", concentration)
}
if (concentration > 0) {
try(df2_rf_temp[df2_rf_temp$l == 0 & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i])
try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1)
try(df2_rf_temp[df2_rf_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i])
sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", concentration)
df2_rf_new <- rbind(df2_rf_new, df2_rf_temp)
}
}
df2_rf <- df2_rf_new
# Part 4 Get the RF Z score values
# Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score
df2_rf$OrfRep <- paste(df2_rf$OrfRep, df2_rf$Gene, df2_rf$Num., sep = "_")
num_genes_rf <- length(unique(df2_rf$OrfRep))
# print(num_genes_rf)
# Create the output data.frame containing columns for each RF strain
interaction_scores_rf <- unique(df2_rf["OrfRep"])
# interaction_scores_rf$Gene <- unique(df2$Gene)
interaction_scores_rf$Gene <- NA
interaction_scores_rf$Raw_Shift_l <- NA
interaction_scores_rf$z_shift_l <- NA
interaction_scores_rf$lm_Score_l <- NA
interaction_scores_rf$z_lm_l <- NA
interaction_scores_rf$R_Squared_l <- NA
interaction_scores_rf$Sum_z_Score_l <- NA
interaction_scores_rf$avg_zscore_l <- NA
interaction_scores_rf$Raw_Shift_k <- NA
interaction_scores_rf$z_shift_k <- NA
interaction_scores_rf$lm_Score_k <- NA
interaction_scores_rf$z_lm_k <- NA
interaction_scores_rf$R_Squared_k <- NA
interaction_scores_rf$Sum_z_Score_k <- NA
interaction_scores_rf$avg_zscore_k <- NA
interaction_scores_rf$Raw_Shift_r <- NA
interaction_scores_rf$z_shift_r <- NA
interaction_scores_rf$lm_Score_r <- NA
interaction_scores_rf$z_lm_r <- NA
interaction_scores_rf$R_Squared_r <- NA
interaction_scores_rf$Sum_z_Score_r <- NA
interaction_scores_rf$avg_zscore_r <- NA
interaction_scores_rf$Raw_Shift_auc <- NA
interaction_scores_rf$z_shift_auc <- NA
interaction_scores_rf$lm_Score_auc <- NA
interaction_scores_rf$z_lm_auc <- NA
interaction_scores_rf$R_Squared_auc <- NA
interaction_scores_rf$Sum_z_Score_auc <- NA
interaction_scores_rf$avg_zscore_auc <- NA
interaction_scores_rf$NG <- NA
interaction_scores_rf$SM <- NA
for (i in 1:num_genes_rf) {
# Get each deletion strain ORF
gene_sel <- unique(df2_rf$OrfRep)[i]
# Extract only the current deletion strain and its data
x_gene_sel <- df2_rf[df2_rf$OrfRep == gene_sel, ]
x_stats_interaction <- ddply(
x_gene_sel,
c("OrfRep", "Gene", "conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
se_l = sd_l / sqrt(N - 1),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
se_k = sd_k / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
se_auc = sd_auc / sqrt(N - 1),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
# Get shift vals
# if L is 0, that means the no growth on no drug
# if L is NA at 0, that means the spot was removed due to contamination
# if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
# otherwise calculate shift at no drug conc
if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) {
x_stats_interaction$Raw_Shift_l <- 0
x_stats_interaction$Raw_Shift_k <- 0
x_stats_interaction$Raw_Shift_r <- 0
x_stats_interaction$Raw_Shift_auc <- 0
x_stats_interaction$z_shift_l <- 0
x_stats_interaction$z_shift_k <- 0
x_stats_interaction$z_shift_r <- 0
x_stats_interaction$z_shift_auc <- 0
} else {
x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l
x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k
x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r
x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc
x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1]
x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1]
x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1]
x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1]
}
# Get WT vals
x_stats_interaction$WT_l <- x_stats_by_l$mean
x_stats_interaction$WT_k <- x_stats_by_k$mean
x_stats_interaction$WT_r <- x_stats_by_r$mean
x_stats_interaction$WT_auc <- x_stats_by_auc$mean
# Get WT SD
x_stats_interaction$WT_sd_l <- x_stats_by_l$sd
x_stats_interaction$WT_sd_k <- x_stats_by_k$sd
x_stats_interaction$WT_sd_r <- x_stats_by_r$sd
x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd
# Only get scores if there's growth at no drug
if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) {
# Calculate expected values
x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l
x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k
x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r
x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc
# Calculate normalized delta values
x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l
x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k
x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r
x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc
# Disregard shift for no growth values in Z score calculation
if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc
}
# Disregard shift for set to max values in Z score calculation
if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_AU
}
# Calculate Z score at each concentration
x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l)
x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k)
x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r)
x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc)
# Get linear model
gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction)
gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction)
gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction)
gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction)
# Get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1]
r_squared_l <- summary(gene_lm_l)$r.squared
gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1]
r_squared_k <- summary(gene_lm_k)$r.squared
gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1]
r_squared_auc <- summary(gene_lm_auc)$r.squared
# Get total of non removed values
num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1
# Report the scores
interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <-
x_gene_sel$OrfRep[1]
interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <-
x_gene_sel$Gene[1]
interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_l[1]
interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_l[1]
interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <-
gene_interaction_l
interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <-
r_squared_l
interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_l, na.rm = TRUE)
interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc)
interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_k[1]
interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_k[1]
interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <-
gene_interaction_k
interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <-
r_squared_k
interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_k, na.rm = TRUE)
interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc)
interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_r[1]
interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_r[1]
interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <-
gene_interaction_r
interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <-
r_squared_r
interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_r, na.rm = TRUE)
interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1)
interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_auc[1]
interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_auc[1]
interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <-
gene_interaction_auc
interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <-
r_squared_auc
interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_auc, na.rm = TRUE)
interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1)
}
if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) {
# Calculate expected values
x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l
x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k
x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r
x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc
# Calculate normalized delta values
x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l
x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k
x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r
x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc
# Disregard shift for missing values in Z score calculation
if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc
}
# Disregard shift for set to max values in Z score calculation
if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc
}
# Calculate Z score at each concentration
x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l)
x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k)
x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r)
x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc)
# NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_l <- NA
gene_lm_k <- NA
gene_lm_r <- NA
gene_lm_auc <- NA
gene_interaction_l <- NA
r_squared_l <- NA
gene_interaction_k <- NA
r_squared_k <- NA
gene_interaction_r <- NA
r_squared_r <- NA
gene_interaction_auc <- NA
r_squared_auc <- NA
x_stats_interaction$Raw_Shift_l <- NA
x_stats_interaction$Raw_Shift_k <- NA
x_stats_interaction$Raw_Shift_r <- NA
x_stats_interaction$Raw_Shift_auc <- NA
x_stats_interaction$z_shift_l <- NA
x_stats_interaction$z_shift_k <- NA
x_stats_interaction$z_shift_r <- NA
x_stats_interaction$z_shift_auc <- NA
interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$OrfRep[1]
interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$Gene[1]
interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1]
interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1]
interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_l
interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l
interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1]
interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1]
interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_k
interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_k
interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1]
interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1]
interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r
interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r
interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1]
interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1]
interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_auc
interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_auc
interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA
interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA
}
if (i == 1) {
x_stats_interaction_all_rf <- x_stats_interaction
}
if (i > 1) {
x_stats_interaction_all_rf <- rbind(x_stats_interaction_all_rf, x_stats_interaction)
}
interaction_scores_rf$NG[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE)
interaction_scores_rf$DB[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE)
interaction_scores_rf$SM[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE)
# x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int)
}
print("Pass RF Calculation loop")
lm_sd_l <- sd(interaction_scores_rf$lm_Score_l, na.rm = TRUE)
lm_sd_k <- sd(interaction_scores_rf$lm_Score_k, na.rm = TRUE)
lm_sd_r <- sd(interaction_scores_rf$lm_Score_r, na.rm = TRUE)
lm_sd_auc <- sd(interaction_scores_rf$lm_Score_auc, na.rm = TRUE)
lm_mean_l <- mean(interaction_scores_rf$lm_Score_l, na.rm = TRUE)
lm_mean_k <- mean(interaction_scores_rf$lm_Score_k, na.rm = TRUE)
lm_mean_r <- mean(interaction_scores_rf$lm_Score_r, na.rm = TRUE)
lm_mean_auc <- mean(interaction_scores_rf$lm_Score_auc, na.rm = TRUE)
print(paste("Mean RF linear regression score L", lm_mean_l))
interaction_scores_rf$z_lm_l <- (interaction_scores_rf$lm_Score_l - lm_mean_l) / (lm_sd_l)
interaction_scores_rf$z_lm_k <- (interaction_scores_rf$lm_Score_k - lm_mean_k) / (lm_sd_k)
interaction_scores_rf$z_lm_r <- (interaction_scores_rf$lm_Score_r - lm_mean_r) / (lm_sd_r)
interaction_scores_rf$z_lm_auc <- (interaction_scores_rf$lm_Score_auc - lm_mean_auc) / (lm_sd_auc)
interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$z_lm_l, decreasing = TRUE), ]
interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$NG, decreasing = TRUE), ]
write.csv(interaction_scores_rf, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
for (i in 1:num_genes_rf) {
gene_sel <- unique(interaction_scores_rf$OrfRep)[i]
x_z_calculations <- x_stats_interaction_all_rf[x_stats_interaction_all_rf$OrfRep == gene_sel, ]
df_int_scores <- interaction_scores_rf[interaction_scores_rf$OrfRep == gene_sel, ]
p_rf_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(df_int_scores$avg_zscore_l, 2))) +
annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$z_lm_l, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))
) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_rf_k[[i]] <- ggplot(
x_z_calculations, aes(conc_num_factor, delta_k)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) +
annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(df_int_scores$z_lm_l, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))
) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_rf_r[[i]] <- ggplot(
x_z_calculations, aes(conc_num_factor, delta_r)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-0.65, 0.65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) +
annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(df_int_scores$z_lm_r, 2))) +
annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))
) +
scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
theme_publication()
p_rf_auc[[i]] <- ggplot(
x_z_calculations, aes(conc_num_factor, delta_auc)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-6500, 6500)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) +
annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(df_int_scores$z_lm_auc, 2))) +
annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))
) +
scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
theme_publication()
if (i == 1) {
x_stats_interaction_all_rf_final <- x_z_calculations
}
if (i > 1) {
x_stats_interaction_all_rf_final <- rbind(x_stats_interaction_all_rf_final, x_z_calculations)
}
}
print("Pass RF ggplot loop")
write.csv(x_stats_interaction_all_rf_final, file.path(out_dir, "rf_zscore_calculations.csv"), row.names = FALSE)
# Part 5 - Get Zscores for Gene deletion strains
# Get total number of genes for the next loop
num_genes <- length(unique(df2$OrfRep))
# print(num_genes)
# Create the output data.frame containing columns for each deletion strain
interaction_scores <- unique(df2["OrfRep"])
# interaction_scores$Gene <- unique(df2$Gene)
interaction_scores$Gene <- NA
interaction_scores$Raw_Shift_l <- NA
interaction_scores$z_shift_l <- NA
interaction_scores$lm_Score_l <- NA
interaction_scores$z_lm_l <- NA
interaction_scores$R_Squared_l <- NA
interaction_scores$Sum_z_Score_l <- NA
interaction_scores$avg_zscore_l <- NA
interaction_scores$Raw_Shift_k <- NA
interaction_scores$z_shift_k <- NA
interaction_scores$lm_Score_k <- NA
interaction_scores$z_lm_k <- NA
interaction_scores$R_Squared_k <- NA
interaction_scores$Sum_z_Score_k <- NA
interaction_scores$avg_zscore_k <- NA
interaction_scores$Raw_Shift_r <- NA
interaction_scores$z_shift_r <- NA
interaction_scores$lm_Score_r <- NA
interaction_scores$z_lm_r <- NA
interaction_scores$R_Squared_r <- NA
interaction_scores$Sum_z_Score_r <- NA
interaction_scores$avg_zscore_r <- NA
interaction_scores$Raw_Shift_auc <- NA
interaction_scores$z_shift_auc <- NA
interaction_scores$lm_Score_auc <- NA
interaction_scores$z_lm_auc <- NA
interaction_scores$R_Squared_auc <- NA
interaction_scores$Sum_z_Score_auc <- NA
interaction_scores$avg_zscore_auc <- NA
interaction_scores$NG <- NA
interaction_scores$DB <- NA
interaction_scores$SM <- NA
for (i in 1:num_genes) {
# Get each deletion strain ORF
gene_sel <- unique(df2$OrfRep)[i]
# Extract only the current deletion strain and its data
x_gene_sel <- df2[df2$OrfRep == gene_sel, ]
x_stats_interaction <- ddply(
x_gene_sel,
c("OrfRep", "Gene", "conc_num", "conc_num_factor"),
summarise,
N = (length(l)),
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
se_l = sd_l / sqrt(N - 1),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
se_k = sd_k / sqrt(N - 1),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N - 1),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
se_auc = sd_auc / sqrt(N - 1),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
# Get shift vals
# if L is 0, that means the no growth on no drug
# if L is NA at 0, that means the spot was removed due to contamination
# if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift
# otherwise calculate shift at no drug conc
if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) {
x_stats_interaction$Raw_Shift_l <- 0
x_stats_interaction$Raw_Shift_k <- 0
x_stats_interaction$Raw_Shift_r <- 0
x_stats_interaction$Raw_Shift_auc <- 0
x_stats_interaction$z_shift_l <- 0
x_stats_interaction$z_shift_k <- 0
x_stats_interaction$z_shift_r <- 0
x_stats_interaction$z_shift_auc <- 0
} else {
x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l
x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k
x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r
x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc
x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1]
x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1]
x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1]
x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1]
}
# Get WT vals
x_stats_interaction$WT_l <- x_stats_by_l$mean
x_stats_interaction$WT_k <- x_stats_by_k$mean
x_stats_interaction$WT_r <- x_stats_by_r$mean
x_stats_interaction$WT_auc <- x_stats_by_auc$mean
# Get WT SD
x_stats_interaction$WT_sd_l <- x_stats_by_l$sd
x_stats_interaction$WT_sd_k <- x_stats_by_k$sd
x_stats_interaction$WT_sd_r <- x_stats_by_r$sd
x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd
# Only get scores if there's growth at no drug
if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) {
# Calculate expected values
x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l
x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k
x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r
x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc
# Calculate normalized delta values
x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l
x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k
x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r
x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc
# Disregard shift for no growth values in Z score calculation
if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc
}
# Disregard shift for set to max values in Z score calculation
if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc
}
# Calculate Z score at each concentration
x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l)
x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k)
x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r)
x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc)
# Get linear model
gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction)
gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction)
gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction)
gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction)
# Get interaction score calculated by linear model and R-squared value for the fit
gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1]
r_squared_l <- summary(gene_lm_l)$r.squared
gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1]
r_squared_k <- summary(gene_lm_k)$r.squared
gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1]
r_squared_r <- summary(gene_lm_r)$r.squared
gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1]
r_squared_auc <- summary(gene_lm_auc)$r.squared
# Get total of non removed values
num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1
# Report the scores
interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <-
as.character(x_gene_sel$OrfRep[1])
interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <-
as.character(x_gene_sel$Gene[1])
interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_l[1]
interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_l[1]
interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <-
gene_interaction_l
interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <-
(gene_interaction_l - lm_mean_l) / lm_sd_l
interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <-
r_squared_l
interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_l, na.rm = TRUE)
interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc)
interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_k[1]
interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_k[1]
interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <-
gene_interaction_k
interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <-
(gene_interaction_k - lm_mean_k) / lm_sd_k
interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <-
r_squared_k
interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_k, na.rm = TRUE)
interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc)
interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_r[1]
interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_r[1]
interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <-
gene_interaction_r
interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <-
(gene_interaction_r - lm_mean_r) / lm_sd_r
interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <-
r_squared_r
interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_r, na.rm = TRUE)
interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1)
interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$Raw_Shift_auc[1]
interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <-
x_stats_interaction$z_shift_auc[1]
interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <-
gene_interaction_auc
interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <-
(gene_interaction_auc - lm_mean_auc) / lm_sd_auc
interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <-
r_squared_auc
interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_auc, na.rm = TRUE)
interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <-
sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1)
}
if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) {
# Calculate expected values
x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l
x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k
x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r
x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc
# Calculate normalized delta values
x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l
x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k
x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r
x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc
# Disregard shift for missing values in Z score calculatiom
if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r
x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <-
x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc
}
# Disregard shift for set to max values in Z score calculation
if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) {
x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <-
x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l
# Only calculate the L value without shift since L is the only adjusted value
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r
# x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <-
# x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc
}
# Calculate Z score at each concentration
x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l)
x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k)
x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r)
x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc)
# NA values for the next part since there's an NA or 0 at the no drug.
gene_lm_l <- NA
gene_lm_k <- NA
gene_lm_r <- NA
gene_interaction_l <- NA
r_squared_l <- NA
gene_interaction_k <- NA
r_squared_k <- NA
gene_interaction_r <- NA
r_squared_r <- NA
x_stats_interaction$Raw_Shift_l <- NA
x_stats_interaction$Raw_Shift_k <- NA
x_stats_interaction$Raw_Shift_r <- NA
x_stats_interaction$Raw_Shift_auc <- NA
x_stats_interaction$z_shift_l <- NA
x_stats_interaction$z_shift_k <- NA
x_stats_interaction$z_shift_r <- NA
x_stats_interaction$z_shift_auc <- NA
interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$OrfRep[1])
interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$Gene[1])
interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1]
interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1]
interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l
interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1]
interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1]
interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <- r_squared_k
interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1]
interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1]
interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r
interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1]
interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1]
interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <- r_squared_auc
interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA
interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- NA
}
if (i == 1) {
x_stats_interaction_all <- x_stats_interaction
}
if (i > 1) {
x_stats_interaction_all <- rbind(x_stats_interaction_all, x_stats_interaction)
}
interaction_scores$NG[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE)
interaction_scores$DB[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE)
interaction_scores$SM[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE)
# x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int)
}
print("Pass Int Calculation loop")
interaction_scores <- interaction_scores[order(interaction_scores$z_lm_l, decreasing = TRUE), ]
interaction_scores <- interaction_scores[order(interaction_scores$NG, decreasing = TRUE), ]
df_order_by_OrfRep <- unique(interaction_scores$OrfRep)
# x_stats_interaction_all <- x_stats_interaction_all[order(x_stats_interaction_all$NG, decreasing = TRUE), ]
write.csv(interaction_scores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
interaction_scores_deletion_enhancers_l <-
interaction_scores[interaction_scores$avg_zscore_l >= 2, ]
interaction_scores_deletion_enhancers_k <-
interaction_scores[interaction_scores$avg_zscore_k <= -2, ]
interaction_scores_deletion_suppressors_l <-
interaction_scores[interaction_scores$avg_zscore_l <= -2, ]
interaction_scores_deletion_suppressors_k <-
interaction_scores[interaction_scores$avg_zscore_k >= 2, ]
interaction_scores_deletion_enhancers_and_suppressors_l <-
interaction_scores[interaction_scores$avg_zscore_l >= 2 | interaction_scores$avg_zscore_l <= -2, ]
interaction_scores_deletion_enhancers_and_suppressors_k <-
interaction_scores[interaction_scores$avg_zscore_k >= 2 | interaction_scores$avg_zscore_k <= -2, ]
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <-
interaction_scores[interaction_scores$z_lm_l >= 2 & interaction_scores$avg_zscore_l <= -2, ]
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <-
interaction_scores[interaction_scores$z_lm_l <= -2 & interaction_scores$avg_zscore_l >= 2, ]
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <-
interaction_scores[interaction_scores$z_lm_k <= -2 & interaction_scores$avg_zscore_k >= 2, ]
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <-
interaction_scores[interaction_scores$z_lm_k >= 2 & interaction_scores$avg_zscore_k <= -2, ]
interaction_scores_deletion_enhancers_l <-
interaction_scores_deletion_enhancers_l[
!is.na(interaction_scores_deletion_enhancers_l$OrfRep), ]
interaction_scores_deletion_enhancers_k <-
interaction_scores_deletion_enhancers_k[
!is.na(interaction_scores_deletion_enhancers_k$OrfRep), ]
interaction_scores_deletion_suppressors_l <-
interaction_scores_deletion_suppressors_l[
!is.na(interaction_scores_deletion_suppressors_l$OrfRep), ]
interaction_scores_deletion_suppressors_k <-
interaction_scores_deletion_suppressors_k[
!is.na(interaction_scores_deletion_suppressors_k$OrfRep), ]
interaction_scores_deletion_enhancers_and_suppressors_l <-
interaction_scores_deletion_enhancers_and_suppressors_l[
!is.na(interaction_scores_deletion_enhancers_and_suppressors_l$OrfRep), ]
interaction_scores_deletion_enhancers_and_suppressors_k <-
interaction_scores_deletion_enhancers_and_suppressors_k[
!is.na(interaction_scores_deletion_enhancers_and_suppressors_k$OrfRep), ]
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <-
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l[
!is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l$OrfRep), ]
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <-
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l[
!is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l$OrfRep), ]
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <-
interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k[
!is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k$OrfRep), ]
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <-
interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k[
!is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k$OrfRep), ]
write.csv(interaction_scores_deletion_enhancers_l,
file.path(out_dir, "zscores_interaction_deletion_enhancers_l.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_k,
file.path(out_dir, "zscores_interaction_deletion_enhancers_k.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_suppressors_l,
file.path(out_dir, "zscores_interaction_deletion_suppressors_l.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_suppressors_k,
file.path(out_dir, "zscores_interaction_deletion_suppressors_k.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_and_suppressors_l,
file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_and_suppressors_k,
file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l,
file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_l.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l,
file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_l.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k,
file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_k.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k,
file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_k.csv"), row.names = FALSE)
# Get enhancers and suppressors for linear regression
interaction_scores_deletion_enhancers_l_lm <-
interaction_scores[interaction_scores$z_lm_l >= 2, ]
interaction_scores_deletion_enhancers_k_lm <-
interaction_scores[interaction_scores$z_lm_k <= -2, ]
interaction_scores_deletion_suppressors_l_lm <-
interaction_scores[interaction_scores$z_lm_l <= -2, ]
interaction_scores_deletion_suppressors_k_lm <-
interaction_scores[interaction_scores$z_lm_k >= 2, ]
interaction_scores_deletion_enhancers_and_suppressors_l_lm <-
interaction_scores[interaction_scores$z_lm_l >= 2 | interaction_scores$z_lm_l <= -2, ]
interaction_scores_deletion_enhancers_and_suppressors_k_lm <-
interaction_scores[interaction_scores$z_lm_k >= 2 | interaction_scores$z_lm_k <= -2, ]
interaction_scores_deletion_enhancers_l_lm <-
interaction_scores_deletion_enhancers_l_lm[
!is.na(interaction_scores_deletion_enhancers_l_lm$OrfRep), ]
interaction_scores_deletion_enhancers_k_lm <-
interaction_scores_deletion_enhancers_k_lm[
!is.na(interaction_scores_deletion_enhancers_k_lm$OrfRep), ]
interaction_scores_deletion_suppressors_l_lm <-
interaction_scores_deletion_suppressors_l_lm[
!is.na(interaction_scores_deletion_suppressors_l_lm$OrfRep), ]
interaction_scores_deletion_suppressors_k_lm <-
interaction_scores_deletion_suppressors_k_lm[
!is.na(interaction_scores_deletion_suppressors_k_lm$OrfRep), ]
interaction_scores_deletion_enhancers_and_suppressors_l_lm <-
interaction_scores_deletion_enhancers_and_suppressors_l_lm[
!is.na(interaction_scores_deletion_enhancers_and_suppressors_l_lm$OrfRep), ]
interaction_scores_deletion_enhancers_and_suppressors_k_lm <-
interaction_scores_deletion_enhancers_and_suppressors_k_lm[
!is.na(interaction_scores_deletion_enhancers_and_suppressors_k_lm$OrfRep), ]
write.csv(interaction_scores_deletion_enhancers_l_lm,
file.path(out_dir, "zscores_interaction_deletion_enhancers_l_lm.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_k_lm,
file.path(out_dir, "zscores_interaction_deletion_enhancers_k_lm.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_suppressors_l_lm,
file.path(out_dir, "zscores_interaction_deletion_suppressors_l_lm.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_suppressors_k_lm,
file.path(out_dir, "zscores_interaction_deletion_suppressors_k_lm.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_and_suppressors_l_lm,
file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l_lm.csv"), row.names = FALSE)
write.csv(interaction_scores_deletion_enhancers_and_suppressors_k_lm,
file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k_lm.csv"), row.names = FALSE)
# write.csv(Labels, study_info_file, row.names = FALSE)
# write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE)
for (i in 1:num_genes) {
gene_sel <- unique(interaction_scores$OrfRep)[i]
x_z_calculations <- x_stats_interaction_all[x_stats_interaction_all$OrfRep == gene_sel, ]
df_int_scores <- interaction_scores[interaction_scores$OrfRep == gene_sel, ]
p_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_l, 2))) +
annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_l, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_k[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_k)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-65, 65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) +
annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_k, 2))) +
annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))) +
scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
theme_publication()
p_r[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_r)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-0.65, 0.65)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) +
annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$z_lm_r, 2))) +
annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))) +
scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
theme_publication()
p_auc[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_auc)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
coord_cartesian(ylim = c(-6500, 6500)) +
geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) +
ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) +
annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) +
scale_color_discrete(guide = FALSE) +
# annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) +
annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$z_lm_auc, 2))) +
annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(x_z_calculations$conc_num_factor),
labels = unique(as.character(x_z_calculations$conc_num))) +
scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
theme_publication()
if (i == 1) {
x_stats_interaction_all_final <- x_z_calculations
}
if (i > 1) {
x_stats_interaction_all_final <- rbind(x_stats_interaction_all_final, x_z_calculations)
}
}
print("Pass Int ggplot loop")
write.csv(x_stats_interaction_all_final, file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
blank <- ggplot(df2_rf) + geom_blank()
pdf(file.path(out_dir, "interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE)
x_stats_df2_rf <- ddply(
df2_rf,
c("conc_num", "conc_num_factor"),
summarise,
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
max_l = max(l, na.rm = TRUE),
min_l = min(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
max_k = max(k, na.rm = TRUE),
min_k = min(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
max_auc = max(auc, na.rm = TRUE),
min_auc = min(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) +
annotate("text", x = -0.25, y = 10, label = "NG") +
annotate("text", x = -0.25, y = 5, label = "DB") +
annotate("text", x = -0.25, y = 0, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) +
theme_publication()
k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) +
annotate("text", x = -0.25, y = -5, label = "NG") +
annotate("text", x = -0.25, y = -12.5, label = "DB") +
annotate("text", x = -0.25, y = -20, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) +
theme_publication()
r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) +
annotate("text", x = -0.25, y = .9, label = "NG") +
annotate("text", x = -0.25, y = .8, label = "DB") +
annotate("text", x = -0.25, y = .7, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) +
theme_publication()
auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) + geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red") +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) +
annotate("text", x = -0.25, y = 11000, label = "NG") +
annotate("text", x = -0.25, y = 10000, label = "DB") +
annotate("text", x = -0.25, y = 9000, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) +
theme_publication()
l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 160)) +
theme_publication()
k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
theme_publication()
r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
theme_publication()
auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 12500)) +
theme_publication()
grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2)
grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2)
# Plot the references
# grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4)
# grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2)
# Loop for grid arrange 4x3
j <- rep(1, ((num_genes) / 3) - 1)
for (n in 1:length(j)) {
j[n + 1] <- n * 3 + 1
}
# Loop for printing each plot
num <- 0
for (m in 1:(round((num_genes) / 3) - 1)) {
num <- j[m]
grid.arrange(p_l[[num]], p_k[[num]], p_r[[num]], p_auc[[num]],
p_l[[num + 1]], p_k[[num + 1]], p_r[[num + 1]], p_auc[[num + 1]],
p_l[[num + 2]], p_k[[num + 2]], p_r[[num + 2]], p_auc[[num + 2]], ncol = 4, nrow = 3)
# grid.arrange(p_l[[364]], p_k[[364]], p_r[[364]],
# p_l[[365]], p_k[[365]], p_r[[365]], p_l[[366]], p_k[[366]], p_r[[366]], ncol = 3, nrow = 3)
# p1[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_k[[num + 4]], p_r[[num + 4]]
}
if (num_genes != (num + 2)) {
total_num <- num_genes - (num + 2)
if (total_num == 5) {
grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]],
p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]],
p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3)
grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]],
p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], p_auc[[num + 7]],
blank, blank, blank, blank, ncol = 4, nrow = 3)
}
if (total_num == 4) {
grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]],
p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]],
p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3)
grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]],
blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3)
}
if (total_num == 3) {
grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]],
p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]],
p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
if (total_num == 2) {
grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]],
p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]],
blank, blank, blank, blank, ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
if (total_num == 1) {
grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]],
blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3)
# grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]],
# p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
}
dev.off()
pdf(file.path(out_dir, "rf_interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE)
x_stats_df2_rf <- ddply(
df2_rf,
c("conc_num", "conc_num_factor"),
summarise,
mean_l = mean(l, na.rm = TRUE),
median_l = median(l, na.rm = TRUE),
max_l = max(l, na.rm = TRUE),
min_l = min(l, na.rm = TRUE),
sd_l = sd(l, na.rm = TRUE),
mean_k = mean(k, na.rm = TRUE),
median_k = median(k, na.rm = TRUE),
max_k = max(k, na.rm = TRUE),
min_k = min(k, na.rm = TRUE),
sd_k = sd(k, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
mean_auc = mean(auc, na.rm = TRUE),
median_auc = median(auc, na.rm = TRUE),
max_auc = max(auc, na.rm = TRUE),
min_auc = min(auc, na.rm = TRUE),
sd_auc = sd(auc, na.rm = TRUE),
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
annotate("text", x = -0.25, y = 10, label = "NG") +
annotate("text", x = -0.25, y = 5, label = "DB") +
annotate("text", x = -0.25, y = 0, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) +
theme_publication()
k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(-20, 160)) +
annotate("text", x = -0.25, y = -5, label = "NG") +
annotate("text", x = -0.25, y = -12.5, label = "DB") +
annotate("text", x = -0.25, y = -20, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) +
theme_publication()
r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
annotate("text", x = -0.25, y = .9, label = "NG") +
annotate("text", x = -0.25, y = .8, label = "DB") +
annotate("text", x = -0.25, y = .7, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) +
theme_publication()
auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) +
geom_point(position = "jitter", size = 1) +
stat_summary(
fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "errorbar", color = "red"
) +
stat_summary(fun = mean, geom = "point", color = "red") +
scale_x_continuous(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 12500)) +
annotate("text", x = -0.25, y = 11000, label = "NG") +
annotate("text", x = -0.25, y = 10000, label = "DB") +
annotate("text", x = -0.25, y = 9000, label = "SM") +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) +
annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) +
theme_publication()
l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 130)) +
theme_publication()
k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 160)) +
theme_publication()
r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
coord_cartesian(ylim = c(0, 1)) +
theme_publication()
auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) +
geom_boxplot() +
scale_x_discrete(
name = unique(df$Drug[1]),
breaks = unique(df2_rf$conc_num_factor),
labels = as.character(unique(df2_rf$conc_num))
) +
ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) +
theme_publication()
grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2)
grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2)
# Plot the references
# grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4)
# grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2)
# Loop for grid arrange 4x3
j <- rep(1, ((num_genes_rf) / 3) - 1)
for (n in 1:length(j)) {
j[n + 1] <- n * 3 + 1
}
# Loop for printing each plot
num <- 0
for (m in 1:(round((num_genes_rf) / 3) - 1)) {
num <- j[m]
grid.arrange(p_rf_l[[num]], p_rf_k[[num]], p_rf_r[[num]], p_rf_auc[[num]],
p_rf_l[[num + 1]], p_rf_k[[num + 1]], p_rf_r[[num + 1]], p_rf_auc[[num + 1]],
p_rf_l[[num + 2]], p_rf_k[[num + 2]], p_rf_r[[num + 2]], p_rf_auc[[num + 2]], ncol = 4, nrow = 3)
# grid.arrange(p_rf_l[[364]], p_rf_k[[364]], p_rf_r[[364]],
# p_rf_l[[365]], p_rf_k[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_k[[366]], p_rf_r[[366]], ncol = 3, nrow = 3)
# p1[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]]
}
if (num_genes_rf != (num + 2)) {
total_num <- num_genes_rf - (num + 2)
if (total_num == 5) {
grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]],
p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]],
p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3
)
grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]],
p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], p_rf_auc[[num + 7]],
blank, blank, blank, blank, ncol = 4, nrow = 3
)
}
if (total_num == 4) {
grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]],
p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]],
p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3
)
grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]],
blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3
)
}
if (total_num == 3) {
grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]],
p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]],
p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
if (total_num == 2) {
grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]],
p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]],
blank, blank, blank, blank, ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
if (total_num == 1) {
grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]],
blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3
)
# grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]],
# p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3)
}
}
dev.off()
# Print rank plots for L and k gene interactions
interaction_scores_adjust_missing <- interaction_scores
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_l), ]$avg_zscore_l <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_k), ]$avg_zscore_k <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_r), ]$avg_zscore_r <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_auc), ]$avg_zscore_auc <- 0.001
interaction_scores_adjust_missing$l_rank <- NA
interaction_scores_adjust_missing$k_rank <- NA
interaction_scores_adjust_missing$r_rank <- NA
interaction_scores_adjust_missing$auc_rank <- NA
interaction_scores_adjust_missing$l_rank <- rank(interaction_scores_adjust_missing$avg_zscore_l)
interaction_scores_adjust_missing$k_rank <- rank(interaction_scores_adjust_missing$avg_zscore_k)
interaction_scores_adjust_missing$r_rank <- rank(interaction_scores_adjust_missing$avg_zscore_r)
interaction_scores_adjust_missing$auc_rank <- rank(interaction_scores_adjust_missing$avg_zscore_auc)
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_l), ]$z_lm_l <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_k), ]$z_lm_k <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_r), ]$z_lm_r <- 0.001
interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_auc), ]$z_lm_auc <- 0.001
interaction_scores_adjust_missing$l_rank_lm <- NA
interaction_scores_adjust_missing$k_rank_lm <- NA
interaction_scores_adjust_missing$r_rank_lm <- NA
interaction_scores_adjust_missing$auc_rank_lm <- NA
interaction_scores_adjust_missing$l_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_l)
interaction_scores_adjust_missing$k_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_k)
interaction_scores_adjust_missing$r_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_r)
interaction_scores_adjust_missing$auc_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_auc)
# Rank plots
rank_l_1sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 1, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -1, ])[1])) +
theme_publication()
rank_l_2sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 2, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -2, ])[1])) +
theme_publication()
rank_l_3sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 3, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -3, ])[1])) +
theme_publication()
rank_k_1sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -1, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 1, ])[1])) +
theme_publication()
rank_k_2sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -2, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 2, ])[1])) +
theme_publication()
rank_k_3sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -3, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 3, ])[1])) +
theme_publication()
rank_l_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(file.path(out_dir, "rank_plots.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
rank_l_1sd,
rank_l_2sd,
rank_l_3sd,
rank_k_1sd,
rank_k_2sd,
rank_k_3sd,
ncol = 3,
nrow = 2
)
grid.arrange(
rank_l_1sd_notext,
rank_l_2sd_notext,
rank_l_3sd_notext,
rank_k_1sd_notext,
rank_k_2sd_notext,
rank_k_3sd_notext,
ncol = 3,
nrow = 2
)
dev.off()
rank_l_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 1, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -1, ])[1])) +
theme_publication()
rank_l_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 2, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -2, ])[1])) +
theme_publication()
rank_l_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 3, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -3, ])[1])) +
theme_publication()
rank_k_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -1, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 1, ])[1])) +
theme_publication()
rank_k_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -2, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 2, ])[1])) +
theme_publication()
rank_k_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -3, ])[1])) +
annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =",
dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 3, ])[1])) +
theme_publication()
rank_l_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(file.path(out_dir, "rank_plots_lm.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
rank_l_1sd_lm,
rank_l_2sd_lm,
rank_l_3sd_lm,
rank_k_1sd_lm,
rank_k_2sd_lm,
rank_k_3sd_lm,
ncol = 3, nrow = 2
)
grid.arrange(
rank_l_1sd_notext_lm,
rank_l_2sd_notext_lm,
rank_l_3sd_notext_lm,
rank_k_1sd_notext_lm,
rank_k_2sd_notext_lm,
rank_k_3sd_notext_lm,
ncol = 3, nrow = 2
)
dev.off()
df_na_rm <- interaction_scores[!is.na(interaction_scores$z_lm_l) | !is.na(interaction_scores$avg_zscore_l), ]
# Find overlaps
df_na_rm$Overlap <- "No Effect"
try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Both")
try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Both")
try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= 2, ]$Overlap <- "Deletion Enhancer lm only")
try(df_na_rm[df_na_rm$z_lm_l <= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only")
try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= -2, ]$Overlap <- "Deletion Suppressor lm only")
try(df_na_rm[df_na_rm$z_lm_l >= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only")
try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score")
try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score")
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_l <- lm(df_na_rm$z_lm_l ~ df_na_rm$avg_zscore_l)
l_lm <- summary(get_lm_l)
get_lm_k <- lm(df_na_rm$z_lm_k ~ df_na_rm$avg_zscore_k)
k_lm <- summary(get_lm_k)
get_lm_r <- lm(df_na_rm$z_lm_r ~ df_na_rm$avg_zscore_r)
r_lm <- summary(get_lm_r)
get_lm_auc <- lm(df_na_rm$z_lm_auc ~ df_na_rm$avg_zscore_auc)
auc_lm <- summary(get_lm_auc)
pdf(file.path(out_dir, "avg_zscore_vs_lm_na_rm.pdf"), width = 16, height = 12, onefile = TRUE)
print(ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(df_na_rm, aes(avg_zscore_k, z_lm_k)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm K") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(k_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(df_na_rm, aes(avg_zscore_r, z_lm_r)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm r") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + theme_publication_legend_right())
print(ggplot(df_na_rm, aes(avg_zscore_auc, z_lm_auc)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Avg Zscore vs lm AUC") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(auc_lm$r.squared, 2))) + theme_publication_legend_right())
dev.off()
lm_v_zscore_l <- ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right()
pgg <- ggplotly(lm_v_zscore_l)
plotly_path <- file.path(out_dir, "avg_zscore_vs_lm_na_rm.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
df_na_rm$l_rank <- rank(df_na_rm$avg_zscore_l)
df_na_rm$k_rank <- rank(df_na_rm$avg_zscore_k)
df_na_rm$r_rank <- rank(df_na_rm$avg_zscore_r)
df_na_rm$auc_rank <- rank(df_na_rm$avg_zscore_auc)
df_na_rm$l_rank_lm <- rank(df_na_rm$z_lm_l)
df_na_rm$k_rank_lm <- rank(df_na_rm$z_lm_k)
df_na_rm$r_rank_lm <- rank(df_na_rm$z_lm_r)
df_na_rm$auc_rank_lm <- rank(df_na_rm$z_lm_auc)
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_l2 <- lm(df_na_rm$l_rank_lm ~ df_na_rm$l_rank)
l_lm2 <- summary(get_lm_l2)
get_lm_k2 <- lm(df_na_rm$k_rank_lm ~ df_na_rm$k_rank)
k_lm2 <- summary(get_lm_k2)
get_lm_r2 <- lm(df_na_rm$r_rank_lm ~ df_na_rm$r_rank)
r_lm2 <- summary(get_lm_r2)
get_lm_auc2 <- lm(df_na_rm$auc_rank_lm ~ df_na_rm$auc_rank)
auc_lm2 <- summary(get_lm_auc2)
num_genes_na_rm2 <- (dim(df_na_rm)[1]) / 2
pdf(file.path(out_dir, "avg_zscore_vs_lm_ranked_na_rm.pdf"), width = 16, height = 12, onefile = TRUE)
print(
ggplot(df_na_rm, aes(l_rank, l_rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm L") +
annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(l_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(df_na_rm, aes(k_rank, k_rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm K") +
annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(k_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(df_na_rm, aes(r_rank, r_rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank Avg Zscore vs lm r") +
annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
print(
ggplot(df_na_rm, aes(auc_rank, auc_rank_lm)) +
geom_point(aes(color = Overlap), shape = 3) +
geom_smooth(method = "lm", color = 1) +
ggtitle("Rank of Avg Zscore vs lm AUC") +
annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(auc_lm2$r.squared, 2))) +
theme_publication_legend_right()
)
dev.off()
rank_l_1sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 1, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -1, ])[1])) +
theme_publication()
rank_l_2sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 2, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -2, ])[1])) +
theme_publication()
rank_l_3sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 3, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -3, ])[1])) +
theme_publication()
rank_k_1sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -1, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 1, ])[1])) +
theme_publication()
rank_k_2sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -2, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 2, ])[1])) +
theme_publication()
rank_k_3sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -3, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 3, ])[1])) +
theme_publication()
rank_l_1sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_2sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_3sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) +
ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_1sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_2sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_3sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) +
ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(file.path(out_dir, "rank_plots_narm.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
rank_l_1sd,
rank_l_2sd,
rank_l_3sd,
rank_k_1sd,
rank_k_2sd,
rank_k_3sd,
ncol = 3, nrow = 2
)
grid.arrange(
rank_l_1sd_notext,
rank_l_2sd_notext,
rank_l_3sd_notext,
rank_k_1sd_notext,
rank_k_2sd_notext,
rank_k_3sd_notext,
ncol = 3, nrow = 2
)
dev.off()
rank_l_1sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 1, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -1, ])[1])) +
theme_publication()
rank_l_2sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 2, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -2, ])[1])) +
theme_publication()
rank_l_3sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 3, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -3, ])[1])) +
theme_publication()
rank_k_1sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -1, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 1, ])[1])) +
theme_publication()
rank_k_2sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -2, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 2, ])[1])) +
theme_publication()
rank_k_3sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -3, ])[1])) +
annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 3, ])[1])) +
theme_publication()
rank_l_1sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_2sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_l_3sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) +
ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_1sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_2sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
rank_k_3sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) +
ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(file.path(out_dir, "rank_plots_lm_narm.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
rank_l_1sd_lm,
rank_l_2sd_lm,
rank_l_3sd_lm,
rank_k_1sd_lm,
rank_k_2sd_lm,
rank_k_3sd_lm,
ncol = 3, nrow = 2
)
grid.arrange(
rank_l_1sd_notext_lm,
rank_l_2sd_notext_lm,
rank_l_3sd_notext_lm,
rank_k_1sd_notext_lm,
rank_k_2sd_notext_lm,
rank_k_3sd_notext_lm,
ncol = 3, nrow = 2
)
dev.off()
}
# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2
get_lm_1 <- lm(df_na_rm$z_lm_k ~ df_na_rm$z_lm_l)
l_lm_1 <- summary(get_lm_1)
get_lm_2 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_l)
l_lm_2 <- summary(get_lm_2)
get_lm_3 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_l)
l_lm_3 <- summary(get_lm_3)
get_lm_4 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_k)
l_lm_4 <- summary(get_lm_4)
get_lm_5 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_k)
l_lm_5 <- summary(get_lm_5)
get_lm_6 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_r)
l_lm_6 <- summary(get_lm_6)
pdf(file.path(out_dir, "correlation_cpps.pdf"), width = 10, height = 7, onefile = TRUE)
ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") +
ylab("z-score K") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18))
interaction_scores_rf2 <- interaction_scores_rf[!is.na(interaction_scores_rf$z_lm_l), ]
ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_k), color = "cyan") +
ggtitle("Interaction L vs. Interaction K") +
xlab("z-score L") +
ylab("z-score K") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_r), color = "cyan") +
ggtitle("Interaction L vs. Interaction r") +
xlab("z-score L") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_auc), color = "cyan") +
ggtitle("Interaction L vs. Interaction AUC") +
xlab("z-score L") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_r), color = "cyan") +
ggtitle("Interaction K vs. Interaction r") +
xlab("z-score K") +
ylab("z-score r") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_auc), color = "cyan") +
ggtitle("Interaction K vs. Interaction AUC") +
xlab("z-score K") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) +
geom_point(shape = 3, color = "gray70") +
geom_point(data = interaction_scores_rf2, aes(z_lm_r, z_lm_auc), color = "cyan") +
ggtitle("Interaction r vs. Interaction AUC") +
xlab("z-score r") +
ylab("z-score AUC") +
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) +
theme_publication_legend_right() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 18)
)
dev.off()
# write.csv(Labels, file.path("../Code/Parameters.csv"), row.names = FALSE)
timestamp()