#!/usr/bin/env Rscript # NOTE: The script now has 6 optional arguments: # 1. Path to input easy results file # 2. /output/ directory # 3. Path to StudyInfo.csv # 4. Path to sgd_gene_list # 5. The experiment number (Exp# directory) # 6. Standard deviation value library("ggplot2") library("plyr") library("extrafont") library("gridExtra") library("gplots") library("RColorBrewer") library("stringr") library("gdata") library("plotly") library("htmlwidgets") # Parse arguments args <- commandArgs(TRUE) exp_number <- as.numeric(args[1]) delta_bg_factor <- as.numeric(args[2]) study_info_file <- file.path(args[3]) sgd_gene_list <- file.path(args[4]) input_file <- file.path(args[5]) out_dir <- file.path(args[6]) sprintf("The Standard Deviation value is: %f", delta_bg_factor) out_dir_qc <- file.path(out_dir, "qc") if (!dir.exists(out_dir)) { dir.create(out_dir) } if (!dir.exists(out_dir_qc)) { dir.create(out_dir_qc) } options(width = 1000) ls.str() # Write delBGFactor to the StudyInfo file # TODO we probably shouldn't be doing this, need one source of truth # TODO disabling this for now # labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) # sep = "," # labels[exp_number, 3] <- delta_bg_factor # write.csv(Labels, file = study_info_file, row.names = FALSE) # Begin User Data Selection Section # Read in the data df <- read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) df <- df[!(df[[1]] %in% c("", "Scan")), ] # df <- df[!(df[[1]]%in%c(61:76)), ] # remove dAmp plates which are Scans 61 thru 76 # df <- df[which(df$Specifics == "WT"), ] # df_length <- length(df[1, ]) # df_end <- length(df[1, ]) - 2 # df <- df[, c(1:42, df_end:df_length)] # print(names(df)) # Use numeric data to perform operations df$col <- as.numeric(df$Col) df$row <- as.numeric(df$Row) df$l <- as.numeric(df$l) df$k <- as.numeric(df$K) df$r <- as.numeric(df$r) df$scan <- as.numeric(df$Scan) df$auc <- as.numeric(df$AUC) df$last_bg <- as.numeric(df$LstBackgrd) df$first_bg <- as.numeric(df$X1stBackgrd) # print(df) # Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col # as was the case in Gemcitabin and Cytarabin experiments. # The following allows user to rename columns so as to get the appropriate # data where it needs to be for the script to run properly. # colnames(df)[7] <- "Modifier1" # colnames(df)[8] <- "Conc1" # colnames(df)[10] <- "Drug" # colnames(df)[11] <- "Conc" # Set the OrfRep to YDL227C for the ref data df[df$ORF == "YDL227C", ]$OrfRep <- "YDL227C" # Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. # That is the first DM plates are removed from the data set with the following. # This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin # df <- df[df$Conc1 != "0ug/ml", ] df <- df[df$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment # Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. # This code removes those spots from the data set used for the interaction analysis. # Dr.Hartman feels that these donot effect Zscores significantly and so "non-curated" files were used. # try(df <- df[df$ORF != "bad_spot", ]) # Get total number of drug concentrations total_conc_nums <- length(unique(df$Conc)) # Function to ID numbers in string with characters+numbers (ie to get numeric drug conc) numextract <- function(string) { str_extract(string, "\\-*\\d+\\.*\\d*") } # Generate a new column with the numeric drug concs df$conc_num <- as.numeric(numextract(df$Conc)) # Generate new column with the numeric drug concs as factors starting at 0 for the graphing later df$conc_num_factor <- as.numeric(as.factor(df$conc_num)) - 1 # Get the max factor for concentration max_conc <- max(df$conc_num_factor) # If treating numbers not as factors uncomment next line and comment out previous line # max_conc <- max(df$conc_num) # Remove wells with problems for making graphs and to not include in summary statistics df <- df[df$Gene != "BLANK", ] df <- df[df$Gene != "Blank", ] df <- df[df$ORF != "Blank", ] df <- df[df$Gene != "blank", ] # df <- df[df$Gene != "HO", ] # Use sgd_gene_list to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). # This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later # Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to overwrite the RF1&RF2 geneNames # in the z_lm_l, k, r&auc output values. Values correlated well but were off by a multiplier factor. genes <- data.frame(read.delim( file = sgd_gene_list, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))) for (i in 1:length(df[, 14])) { ii <- as.numeric(i) line_num <- match(df[ii, 14], genes[, 1], nomatch = 1) orf_rep_col_num <- as.numeric(match("OrfRep", names(df))) if (df[ii, orf_rep_col_num] != "YDL227C") { df[ii, 15] <- genes[line_num, 2] } if ((df[ii, 15] == "") || (df[ii, 15] == "OCT1")) { df[ii, 15] <- df[ii, orf_rep_col_num] } } # Remove dAmPs # jlh confirmed to leave dAmps in so comment out this section # DAmPs_list <- "../Code/22_0602_Remy_DAmPsList.txt" # Damps <- read.delim(DAmPs_list, header = F) # df <- df[!(df$ORF %in% Damps$V1), ] # fix this to Damps[, 1] # dfafterDampsRM = df # backup for debugging # Begin Graphics Boiler Plate Section # theme elements for plots theme_publication <- function(base_size = 14, base_family = "sans") { library(grid) library(ggthemes) (theme_foundation(base_size = base_size, base_family = base_family) + theme(plot.title = element_text( face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size = unit(0.2, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_publication <- function(...) { library(scales) discrete_scale("fill", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } theme_publication_legend_right <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_publication <- function(...) { discrete_scale("fill", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } # Print timestamp for initial time the code starts timestamp() # Begin QC Section # Part 2 - Quality control # Print quality control graphs for each dataset before removing contaminated data # and before adjusting missing data to max theoretical values # Plate analysis plot # Plate analysis is a quality check to identify plate effects containing anomalies plate_analysis_l <- ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() plate_analysis_k <- ggplot(df, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() plate_analysis_r <- ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() plate_analysis_auc <- ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for auc before quality control") + theme_publication() plate_analysis_l_box <- ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() plate_analysis_k_box <- ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() plate_analysis_r_box <- ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() plate_analysis_auc_box <- ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for auc before quality control") + theme_publication() # Quality control - values with a high delta background likely have heavy contamination # Check the frequency of these values # Report the L and k values of these spots # Report the number to be removed based on the delta_background_tolerance df$delta_bg <- df$last_bg - df$first_bg # Raw l vs k before QC raw_l_vs_k_before_qc <- ggplot(df, aes(l, k, color = as.factor(conc_num))) + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + ggtitle("Raw L vs K before QC") + theme_publication_legend_right() pdf(file.path(out_dir_qc, "raw_l_vs_k_before_qc.pdf"), width = 12, height = 8) raw_l_vs_k_before_qc dev.off() pgg <- ggplotly(raw_l_vs_k_before_qc) plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_before_qc.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Set delta background tolerance based on 3 sds from the mean delta background delta_background_tolerance <- mean(df$delta_bg) + (delta_bg_factor * sd(df$delta_bg)) # delta_background_tolerance <- mean(df$delta_bg)+(3*sd(df$delta_bg)) sprintf("delta_background_tolerance is %f", delta_background_tolerance) plate_analysis_delta_bg <- ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2, position = "jitter") + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + theme_publication() plate_analysis_delta_bg_box <- ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + theme_publication() x_delta_bg_above_tolerance <- df[df$delta_bg >= delta_background_tolerance, ] x_delta_bg_above_tolerance_k_halfmedian <- (median(x_delta_bg_above_tolerance$k, na.rm = TRUE)) / 2 x_delta_bg_above_tolerance_l_halfmedian <- (median(x_delta_bg_above_tolerance$l, na.rm = TRUE)) / 2 x_delta_bg_above_tolerance_to_remove <- dim(x_delta_bg_above_tolerance)[1] x_delta_bg_above_tolerance_l_vs_k <- ggplot(x_delta_bg_above_tolerance, aes(l, k, color = as.factor(conc_num))) + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + ggtitle(paste("Raw L vs K for strains above delta background threshold of", delta_background_tolerance, "or above")) + annotate("text", x = x_delta_bg_above_tolerance_l_halfmedian, y = x_delta_bg_above_tolerance_k_halfmedian, label = paste("Strains above delta background tolerance = ", x_delta_bg_above_tolerance_to_remove) ) + theme_publication_legend_right() pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.pdf"), width = 12, height = 8) x_delta_bg_above_tolerance_l_vs_k dev.off() pgg <- ggplotly(x_delta_bg_above_tolerance_l_vs_k) plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Frequency plot for all data vs. the delta_background delta_bg_frequency_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_density() + ggtitle("Density plot for Delta Background by Conc All Data") + theme_publication_legend_right() # Bar plot for all data vs. the delta_background delta_bg_bar_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_bar() + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right() pdf(file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8) print(delta_bg_frequency_plot) print(delta_bg_bar_plot) dev.off() # Need to identify missing data, and differentiate between this data and removed data # so the removed data can get set to NA and the missing data can get set to max theoretical values # 1 for missing data, 0 for non missing data # Use "NG" for NoGrowth rather than "missing" df$NG <- 0 try(df[df$l == 0 & !is.na(df$l), ]$NG <- 1) # 1 for removed data, 0 non removed data # Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" df$DB <- 0 try(df[df$delta_bg >= delta_background_tolerance, ]$DB <- 1) # Replace the CPPs for l, r, auc and k (must be last!) for removed data try(df[df$delta_bg >= delta_background_tolerance, ]$l <- NA) try(df[df$delta_bg >= delta_background_tolerance, ]$r <- NA) try(df[df$delta_bg >= delta_background_tolerance, ]$auc <- NA) try(df[df$delta_bg >= delta_background_tolerance, ]$k <- NA) # QC Plots plate_analysis_l_after_qc <- ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() plate_analysis_k_after_qc <- ggplot(df, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for k after quality control") + theme_publication() plate_analysis_r_after_qc <- ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() plate_analysis_auc_after_qc <- ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() plate_analysis_delta_bg_after_qc <- ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() plate_analysis_l_box_after_qc <- ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() plate_analysis_k_box_after_qc <- ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() plate_analysis_r_box_after_qc <- ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() plate_analysis_auc_box_after_qc <- ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() plate_analysis_delta_bg_box_after_qc <- ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() # Print the plate analysis data before and after QC pdf(file.path(out_dir_qc, "plate_analysis.pdf"), width = 14, height = 9) plate_analysis_l plate_analysis_l_after_qc plate_analysis_k plate_analysis_k_after_qc plate_analysis_r plate_analysis_r_after_qc plate_analysis_auc plate_analysis_auc_after_qc plate_analysis_delta_bg plate_analysis_delta_bg_after_qc dev.off() # Print the plate analysis data before and after QC pdf(file.path(out_dir_qc, "plate_analysis_boxplots.pdf"), width = 18, height = 9) plate_analysis_l_box plate_analysis_l_box_after_qc plate_analysis_k_box plate_analysis_k_box_after_qc plate_analysis_r_box plate_analysis_r_box_after_qc plate_analysis_auc_box plate_analysis_auc_box_after_qc plate_analysis_delta_bg_box plate_analysis_delta_bg_box_after_qc dev.off() # Remove the zero values and print plate analysis x_no_zero <- df[which(df$l > 0), ] plate_analysis_l_after_qc_z <- ggplot(x_no_zero, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() plate_analysis_k_after_qc_z <- ggplot(x_no_zero, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() plate_analysis_r_after_qc_z <- ggplot(x_no_zero, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() plate_analysis_auc_after_qc_z <- ggplot(x_no_zero, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() plate_analysis_delta_bg_after_qc_z <- ggplot(x_no_zero, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() plate_analysis_l_box_after_qc_z <- ggplot(x_no_zero, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() plate_analysis_k_box_after_qc_z <- ggplot(x_no_zero, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() plate_analysis_r_box_after_qc_z <- ggplot(x_no_zero, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() plate_analysis_auc_box_after_qc_z <- ggplot(x_no_zero, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() plate_analysis_delta_bg_box_after_qc_z <- ggplot(x_no_zero, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() # Print the plate analysis data before and after QC pdf(file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9) plate_analysis_l_after_qc_z plate_analysis_k_after_qc_z plate_analysis_r_after_qc_z plate_analysis_auc_after_qc_z plate_analysis_delta_bg_after_qc_z dev.off() # Print the plate analysis data before and after QC pdf(file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9) plate_analysis_l_box_after_qc_z plate_analysis_k_box_after_qc_z plate_analysis_r_box_after_qc_z plate_analysis_auc_box_after_qc_z plate_analysis_delta_bg_box_after_qc_z dev.off() # Remove dataset with zeros removed rm(x_no_zero) # df_test_missing_and_removed <- df[df$Removed == 1, ] # Calculate summary statistics for all strains, including both background and the deletions x_stats_all <- ddply( df, c("conc_num", "conc_num_factor"), summarise, N = (length(l)), mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), max_l = max(l, na.rm = TRUE), min_l = min(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), se_l = sd_l / sqrt(N - 1), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), max_k = max(k, na.rm = TRUE), min_k = min(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), max_auc = max(auc, na.rm = TRUE), min_auc = min(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), se_auc = sd_auc / sqrt(N - 1) ) # print(x_stats_all_l) write.csv(x_stats_all, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) # Part 3 - Generate summary statistics and calculate the max theoretical L value # Calculate the Z score at each drug conc for each deletion strain # Get the background strains - can be modified to take another argument but for most screens will just be YDL227C background_strains <- c("YDL227C") # First part of loop will go through for each background strain # In most cases there will only be one YDL227C for (s in background_strains) { x_background <- df[df$OrfRep == s, ] # If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics # we may want to consider in some cases giving the max high value to L depending on the data type if (table(x_background$l)[1] == 0) { x_background[x_background$l == 0, ]$l <- NA x_background[x_background$k == 0, ]$k <- NA x_background[x_background$r == 0, ]$r <- NA x_background[x_background$auc == 0, ]$auc <- NA } x_background <- x_background[!is.na(x_background$l), ] # Get summary stats for L, k, R, auc x_stats_by_l <- ddply( x_background, c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l, na.rm = TRUE), median = median(l, na.rm = TRUE), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l, na.rm = TRUE), se = sd / sqrt(N - 1) ) print(x_stats_by_l) x1_sd <- max(x_stats_by_l$sd) x_stats_by_k <- ddply( x_background, c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = (length(k)), mean = mean(k, na.rm = TRUE), median = median(k, na.rm = TRUE), max = max(k, na.rm = TRUE), min = min(k, na.rm = TRUE), sd = sd(k, na.rm = TRUE), se = sd / sqrt(N - 1) ) x1_sd_k <- max(x_stats_by_k$sd) x_stats_by_r <- ddply( x_background, c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = length(r), mean = mean(r, na.rm = TRUE), median = median(r, na.rm = TRUE), max = max(r, na.rm = TRUE), min = min(r, na.rm = TRUE), sd = sd(r, na.rm = TRUE), se = sd / sqrt(N - 1) ) x1_sd_r <- max(x_stats_by_r$sd) x_stats_by_auc <- ddply( x_background, c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = length(auc), mean = mean(auc, na.rm = TRUE), median = median(auc, na.rm = TRUE), max = max(auc, na.rm = TRUE), min = min(auc, na.rm = TRUE), sd = sd(auc, na.rm = TRUE), se = sd / sqrt(N - 1) ) x1_sd_auc <- max(x_stats_by_auc$sd) x_stats_by <- ddply( x_background, c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = (length(l)), mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), max_l = max(l, na.rm = TRUE), min_l = min(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), se_l = sd_l / sqrt(N - 1), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), max_k = max(k, na.rm = TRUE), min_k = min(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), max_auc = max(auc, na.rm = TRUE), min_l = min(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), se_auc = sd_auc / sqrt(N - 1) ) write.csv(x_stats_by, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE) # Calculate the max theoretical L values # Only look for max values when k is within 2sd of the ref strain for (q in unique(df$conc_num_factor)) { if (q == 0) { x_within_2sd_k <- df[df$conc_num_factor == q, ] x_within_2sd_k <- x_within_2sd_k[!is.na(x_within_2sd_k$l), ] x_stats_temp_k <- x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] x_within_2sd_k <- x_within_2sd_k[x_within_2sd_k$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] x_within_2sd_k <- x_within_2sd_k[x_within_2sd_k$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] x_outside_2sd_k <- df[df$conc_num_factor == q, ] x_outside_2sd_k <- x_outside_2sd_k[!is.na(x_outside_2sd_k$l), ] # x_outside_2sd_k_temp <- # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] x_outside_2sd_k <- x_outside_2sd_k[ x_outside_2sd_k$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] # x_outside_2sd_k <- # x_outside_2sd_k[x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])), ] } if (q > 0) { x_within_2sd_k_temp <- df[df$conc_num_factor == q, ] x_within_2sd_k_temp <- x_within_2sd_k_temp[!is.na(x_within_2sd_k_temp$l), ] x_stats_temp_k <- x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] x_within_2sd_k_temp <- x_within_2sd_k_temp[x_within_2sd_k_temp$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] x_within_2sd_k_temp <- x_within_2sd_k_temp[x_within_2sd_k_temp$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] x_within_2sd_k <- rbind(x_within_2sd_k, x_within_2sd_k_temp) x_outside_2sd_k_temp <- df[df$conc_num_factor == q, ] x_outside_2sd_k_temp <- x_outside_2sd_k_temp[!is.na(x_outside_2sd_k_temp$l), ] # x_outside_2sd_k_temp <- # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] x_outside_2sd_k_temp <- x_outside_2sd_k_temp[ x_outside_2sd_k_temp$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] # x_outside_2sd_k_temp <- # x_outside_2sd_k_temp[x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])) , ] x_outside_2sd_k <- rbind(x_outside_2sd_k, x_outside_2sd_k_temp) } } x_stats_by_l_within_2sd_k <- ddply( x_within_2sd_k, c("conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l), se = sd / sqrt(N - 1), z_max = (max - mean) / sd ) print(x_stats_by_l_within_2sd_k) x1_sd_within_2sd_k <- max(x_stats_by_l_within_2sd_k$sd) write.csv( x_stats_by_l_within_2sd_k, file.path(out_dir_qc, "max_observed_l_vals_for_spots_within_2sd_k.csv"), row.names = FALSE ) x_stats_by_l_outside_2sd_k <- ddply( x_outside_2sd_k, c("conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l), se = sd / sqrt(N - 1) ) print(x_stats_by_l_outside_2sd_k) x1_sd_outside_2sd_k <- max(x_stats_by_l_outside_2sd_k$sd) # x1_sd_outside_2sd_k <- df[df$l %in% x1_sd_within_2sd_k$l, ] outside_2sd_k_l_vs_k <- ggplot(x_outside_2sd_k, aes(l, k, color = as.factor(conc_num))) + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + ggtitle("Raw L vs K for strains falling outside 2sd of the K mean at each conc") + theme_publication_legend_right() pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) print(outside_2sd_k_l_vs_k) dev.off() pgg <- ggplotly(outside_2sd_k_l_vs_k) plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_outside_2sd_k.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) outside_2sd_k_delta_background_vs_k <- ggplot(x_outside_2sd_k, aes(delta_bg, k, color = as.factor(conc_num))) + geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") + ggtitle("DeltaBackground vs K for strains falling outside 2sd of the K mean at each conc") + theme_publication_legend_right() pdf(file.path(out_dir_qc, "delta_background_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) outside_2sd_k_delta_background_vs_k dev.off() pgg <- ggplotly(outside_2sd_k_delta_background_vs_k) # pgg plotly_path <- file.path(out_dir_qc, "delta_background_vs_k_for_strains_outside_2sd_k.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Get the background strain mean values at the no drug conc to calculate shift background_l <- x_stats_by_l$mean[1] background_k <- x_stats_by_k$mean[1] background_r <- x_stats_by_r$mean[1] background_auc <- x_stats_by_auc$mean[1] # Create empty plots for plotting element p_l <- ggplot() p_k <- ggplot() p_r <- ggplot() p_auc <- ggplot() p_rf_l <- ggplot() p_rf_k <- ggplot() p_rf_r <- ggplot() p_rf_auc <- ggplot() # Get only the deletion strains df2 <- df df2 <- df2[df2$OrfRep != "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max df2$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min for (i in 1:length(unique(df2$conc_num))) { concentration <- unique(df2$conc_num)[i] df2_temp <- df2[df2$conc_num == concentration, ] if (concentration == 0) { df2_new <- df2_temp sprintf("Check loop order, conc = %f", concentration) } if (concentration > 0) { try(df2_temp[df2_temp$l == 0 & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) # df2_temp[df2_temp$k == 0, ]$k <- x_stats_all_k$max[i] # df2_temp[df2_temp$r == 0, ]$r <- x_stats_all_r$max[i] # df2_temp[df2_temp$auc == 0, ]$auc <- x_stats_all_auc$max[i] sprintf("Check loop order, conc = %f", concentration) df2_new <- rbind(df2_new, df2_temp) } } df2 <- df2_new # Get only the RF strains df2_rf <- df df2_rf <- df2_rf[df2_rf$OrfRep == "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max df2_rf$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min for (i in 1:length(unique(df2_rf$conc_num))) { concentration <- unique(df2_rf$conc_num)[i] df2_rf_temp <- df2_rf[df2_rf$conc_num == concentration, ] if (concentration == 0) { df2_rf_new <- df2_rf_temp sprintf("Check loop order, conc = %f", concentration) } if (concentration > 0) { try(df2_rf_temp[df2_rf_temp$l == 0 & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) try(df2_rf_temp[df2_rf_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", concentration) df2_rf_new <- rbind(df2_rf_new, df2_rf_temp) } } df2_rf <- df2_rf_new # Part 4 Get the RF Z score values # Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score df2_rf$OrfRep <- paste(df2_rf$OrfRep, df2_rf$Gene, df2_rf$Num., sep = "_") num_genes_rf <- length(unique(df2_rf$OrfRep)) # print(num_genes_rf) # Create the output data.frame containing columns for each RF strain interaction_scores_rf <- unique(df2_rf["OrfRep"]) # interaction_scores_rf$Gene <- unique(df2$Gene) interaction_scores_rf$Gene <- NA interaction_scores_rf$Raw_Shift_l <- NA interaction_scores_rf$z_shift_l <- NA interaction_scores_rf$lm_Score_l <- NA interaction_scores_rf$z_lm_l <- NA interaction_scores_rf$R_Squared_l <- NA interaction_scores_rf$Sum_z_Score_l <- NA interaction_scores_rf$avg_zscore_l <- NA interaction_scores_rf$Raw_Shift_k <- NA interaction_scores_rf$z_shift_k <- NA interaction_scores_rf$lm_Score_k <- NA interaction_scores_rf$z_lm_k <- NA interaction_scores_rf$R_Squared_k <- NA interaction_scores_rf$Sum_z_Score_k <- NA interaction_scores_rf$avg_zscore_k <- NA interaction_scores_rf$Raw_Shift_r <- NA interaction_scores_rf$z_shift_r <- NA interaction_scores_rf$lm_Score_r <- NA interaction_scores_rf$z_lm_r <- NA interaction_scores_rf$R_Squared_r <- NA interaction_scores_rf$Sum_z_Score_r <- NA interaction_scores_rf$avg_zscore_r <- NA interaction_scores_rf$Raw_Shift_auc <- NA interaction_scores_rf$z_shift_auc <- NA interaction_scores_rf$lm_Score_auc <- NA interaction_scores_rf$z_lm_auc <- NA interaction_scores_rf$R_Squared_auc <- NA interaction_scores_rf$Sum_z_Score_auc <- NA interaction_scores_rf$avg_zscore_auc <- NA interaction_scores_rf$NG <- NA interaction_scores_rf$SM <- NA for (i in 1:num_genes_rf) { # Get each deletion strain ORF gene_sel <- unique(df2_rf$OrfRep)[i] # Extract only the current deletion strain and its data x_gene_sel <- df2_rf[df2_rf$OrfRep == gene_sel, ] x_stats_interaction <- ddply( x_gene_sel, c("OrfRep", "Gene", "conc_num", "conc_num_factor"), summarise, N = (length(l)), mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), se_l = sd_l / sqrt(N - 1), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), se_auc = sd_auc / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) # Get shift vals # if L is 0, that means the no growth on no drug # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { x_stats_interaction$Raw_Shift_l <- 0 x_stats_interaction$Raw_Shift_k <- 0 x_stats_interaction$Raw_Shift_r <- 0 x_stats_interaction$Raw_Shift_auc <- 0 x_stats_interaction$z_shift_l <- 0 x_stats_interaction$z_shift_k <- 0 x_stats_interaction$z_shift_r <- 0 x_stats_interaction$z_shift_auc <- 0 } else { x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1] x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1] x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1] x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1] } # Get WT vals x_stats_interaction$WT_l <- x_stats_by_l$mean x_stats_interaction$WT_k <- x_stats_by_k$mean x_stats_interaction$WT_r <- x_stats_by_r$mean x_stats_interaction$WT_auc <- x_stats_by_auc$mean # Get WT SD x_stats_interaction$WT_sd_l <- x_stats_by_l$sd x_stats_interaction$WT_sd_k <- x_stats_by_k$sd x_stats_interaction$WT_sd_r <- x_stats_by_r$sd x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd # Only get scores if there's growth at no drug if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for no growth values in Z score calculation if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_AU } # Calculate Z score at each concentration x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # Get linear model gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] r_squared_l <- summary(gene_lm_l)$r.squared gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] r_squared_k <- summary(gene_lm_k)$r.squared gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] r_squared_auc <- summary(gene_lm_auc)$r.squared # Get total of non removed values num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$OrfRep[1] interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$Gene[1] interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_l interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_l, na.rm = TRUE) interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_k interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_k interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_k, na.rm = TRUE) interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_r, na.rm = TRUE) interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_auc interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_auc interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_auc, na.rm = TRUE) interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) } if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for missing values in Z score calculation if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # NA values for the next part since there's an NA or 0 at the no drug. gene_lm_l <- NA gene_lm_k <- NA gene_lm_r <- NA gene_lm_auc <- NA gene_interaction_l <- NA r_squared_l <- NA gene_interaction_k <- NA r_squared_k <- NA gene_interaction_r <- NA r_squared_r <- NA gene_interaction_auc <- NA r_squared_auc <- NA x_stats_interaction$Raw_Shift_l <- NA x_stats_interaction$Raw_Shift_k <- NA x_stats_interaction$Raw_Shift_r <- NA x_stats_interaction$Raw_Shift_auc <- NA x_stats_interaction$z_shift_l <- NA x_stats_interaction$z_shift_k <- NA x_stats_interaction$z_shift_r <- NA x_stats_interaction$z_shift_auc <- NA interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$OrfRep[1] interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$Gene[1] interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_l interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_k interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_k interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_auc interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_auc interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA } if (i == 1) { x_stats_interaction_all_rf <- x_stats_interaction } if (i > 1) { x_stats_interaction_all_rf <- rbind(x_stats_interaction_all_rf, x_stats_interaction) } interaction_scores_rf$NG[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) interaction_scores_rf$DB[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) interaction_scores_rf$SM[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) } print("Pass RF Calculation loop") lm_sd_l <- sd(interaction_scores_rf$lm_Score_l, na.rm = TRUE) lm_sd_k <- sd(interaction_scores_rf$lm_Score_k, na.rm = TRUE) lm_sd_r <- sd(interaction_scores_rf$lm_Score_r, na.rm = TRUE) lm_sd_auc <- sd(interaction_scores_rf$lm_Score_auc, na.rm = TRUE) lm_mean_l <- mean(interaction_scores_rf$lm_Score_l, na.rm = TRUE) lm_mean_k <- mean(interaction_scores_rf$lm_Score_k, na.rm = TRUE) lm_mean_r <- mean(interaction_scores_rf$lm_Score_r, na.rm = TRUE) lm_mean_auc <- mean(interaction_scores_rf$lm_Score_auc, na.rm = TRUE) print(paste("Mean RF linear regression score L", lm_mean_l)) interaction_scores_rf$z_lm_l <- (interaction_scores_rf$lm_Score_l - lm_mean_l) / (lm_sd_l) interaction_scores_rf$z_lm_k <- (interaction_scores_rf$lm_Score_k - lm_mean_k) / (lm_sd_k) interaction_scores_rf$z_lm_r <- (interaction_scores_rf$lm_Score_r - lm_mean_r) / (lm_sd_r) interaction_scores_rf$z_lm_auc <- (interaction_scores_rf$lm_Score_auc - lm_mean_auc) / (lm_sd_auc) interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$z_lm_l, decreasing = TRUE), ] interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$NG, decreasing = TRUE), ] write.csv(interaction_scores_rf, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) for (i in 1:num_genes_rf) { gene_sel <- unique(interaction_scores_rf$OrfRep)[i] x_z_calculations <- x_stats_interaction_all_rf[x_stats_interaction_all_rf$OrfRep == gene_sel, ] df_int_scores <- interaction_scores_rf[interaction_scores_rf$OrfRep == gene_sel, ] p_rf_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(df_int_scores$avg_zscore_l, 2))) + annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$z_lm_l, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_rf_k[[i]] <- ggplot( x_z_calculations, aes(conc_num_factor, delta_k)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(df_int_scores$z_lm_l, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_rf_r[[i]] <- ggplot( x_z_calculations, aes(conc_num_factor, delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(df_int_scores$z_lm_r, 2))) + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() p_rf_auc[[i]] <- ggplot( x_z_calculations, aes(conc_num_factor, delta_auc)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(df_int_scores$z_lm_auc, 2))) + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { x_stats_interaction_all_rf_final <- x_z_calculations } if (i > 1) { x_stats_interaction_all_rf_final <- rbind(x_stats_interaction_all_rf_final, x_z_calculations) } } print("Pass RF ggplot loop") write.csv(x_stats_interaction_all_rf_final, file.path(out_dir, "rf_zscore_calculations.csv"), row.names = FALSE) # Part 5 - Get Zscores for Gene deletion strains # Get total number of genes for the next loop num_genes <- length(unique(df2$OrfRep)) # print(num_genes) # Create the output data.frame containing columns for each deletion strain interaction_scores <- unique(df2["OrfRep"]) # interaction_scores$Gene <- unique(df2$Gene) interaction_scores$Gene <- NA interaction_scores$Raw_Shift_l <- NA interaction_scores$z_shift_l <- NA interaction_scores$lm_Score_l <- NA interaction_scores$z_lm_l <- NA interaction_scores$R_Squared_l <- NA interaction_scores$Sum_z_Score_l <- NA interaction_scores$avg_zscore_l <- NA interaction_scores$Raw_Shift_k <- NA interaction_scores$z_shift_k <- NA interaction_scores$lm_Score_k <- NA interaction_scores$z_lm_k <- NA interaction_scores$R_Squared_k <- NA interaction_scores$Sum_z_Score_k <- NA interaction_scores$avg_zscore_k <- NA interaction_scores$Raw_Shift_r <- NA interaction_scores$z_shift_r <- NA interaction_scores$lm_Score_r <- NA interaction_scores$z_lm_r <- NA interaction_scores$R_Squared_r <- NA interaction_scores$Sum_z_Score_r <- NA interaction_scores$avg_zscore_r <- NA interaction_scores$Raw_Shift_auc <- NA interaction_scores$z_shift_auc <- NA interaction_scores$lm_Score_auc <- NA interaction_scores$z_lm_auc <- NA interaction_scores$R_Squared_auc <- NA interaction_scores$Sum_z_Score_auc <- NA interaction_scores$avg_zscore_auc <- NA interaction_scores$NG <- NA interaction_scores$DB <- NA interaction_scores$SM <- NA for (i in 1:num_genes) { # Get each deletion strain ORF gene_sel <- unique(df2$OrfRep)[i] # Extract only the current deletion strain and its data x_gene_sel <- df2[df2$OrfRep == gene_sel, ] x_stats_interaction <- ddply( x_gene_sel, c("OrfRep", "Gene", "conc_num", "conc_num_factor"), summarise, N = (length(l)), mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), se_l = sd_l / sqrt(N - 1), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), se_auc = sd_auc / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) # Get shift vals # if L is 0, that means the no growth on no drug # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { x_stats_interaction$Raw_Shift_l <- 0 x_stats_interaction$Raw_Shift_k <- 0 x_stats_interaction$Raw_Shift_r <- 0 x_stats_interaction$Raw_Shift_auc <- 0 x_stats_interaction$z_shift_l <- 0 x_stats_interaction$z_shift_k <- 0 x_stats_interaction$z_shift_r <- 0 x_stats_interaction$z_shift_auc <- 0 } else { x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1] x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1] x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1] x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1] } # Get WT vals x_stats_interaction$WT_l <- x_stats_by_l$mean x_stats_interaction$WT_k <- x_stats_by_k$mean x_stats_interaction$WT_r <- x_stats_by_r$mean x_stats_interaction$WT_auc <- x_stats_by_auc$mean # Get WT SD x_stats_interaction$WT_sd_l <- x_stats_by_l$sd x_stats_interaction$WT_sd_k <- x_stats_by_k$sd x_stats_interaction$WT_sd_r <- x_stats_by_r$sd x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd # Only get scores if there's growth at no drug if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for no growth values in Z score calculation if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # Get linear model gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] r_squared_l <- summary(gene_lm_l)$r.squared gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] r_squared_k <- summary(gene_lm_k)$r.squared gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] r_squared_auc <- summary(gene_lm_auc)$r.squared # Get total of non removed values num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$OrfRep[1]) interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$Gene[1]) interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <- gene_interaction_l interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- (gene_interaction_l - lm_mean_l) / lm_sd_l interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_l, na.rm = TRUE) interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <- gene_interaction_k interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- (gene_interaction_k - lm_mean_k) / lm_sd_k interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <- r_squared_k interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_k, na.rm = TRUE) interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <- gene_interaction_r interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- (gene_interaction_r - lm_mean_r) / lm_sd_r interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_r, na.rm = TRUE) interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <- gene_interaction_auc interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- (gene_interaction_auc - lm_mean_auc) / lm_sd_auc interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <- r_squared_auc interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_auc, na.rm = TRUE) interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) } if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for missing values in Z score calculatiom if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # NA values for the next part since there's an NA or 0 at the no drug. gene_lm_l <- NA gene_lm_k <- NA gene_lm_r <- NA gene_interaction_l <- NA r_squared_l <- NA gene_interaction_k <- NA r_squared_k <- NA gene_interaction_r <- NA r_squared_r <- NA x_stats_interaction$Raw_Shift_l <- NA x_stats_interaction$Raw_Shift_k <- NA x_stats_interaction$Raw_Shift_r <- NA x_stats_interaction$Raw_Shift_auc <- NA x_stats_interaction$z_shift_l <- NA x_stats_interaction$z_shift_k <- NA x_stats_interaction$z_shift_r <- NA x_stats_interaction$z_shift_auc <- NA interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$OrfRep[1]) interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$Gene[1]) interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <- r_squared_k interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <- r_squared_auc interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- NA } if (i == 1) { x_stats_interaction_all <- x_stats_interaction } if (i > 1) { x_stats_interaction_all <- rbind(x_stats_interaction_all, x_stats_interaction) } interaction_scores$NG[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) interaction_scores$DB[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) interaction_scores$SM[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) } print("Pass Int Calculation loop") interaction_scores <- interaction_scores[order(interaction_scores$z_lm_l, decreasing = TRUE), ] interaction_scores <- interaction_scores[order(interaction_scores$NG, decreasing = TRUE), ] df_order_by_OrfRep <- unique(interaction_scores$OrfRep) # x_stats_interaction_all <- x_stats_interaction_all[order(x_stats_interaction_all$NG, decreasing = TRUE), ] write.csv(interaction_scores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) interaction_scores_deletion_enhancers_l <- interaction_scores[interaction_scores$avg_zscore_l >= 2, ] interaction_scores_deletion_enhancers_k <- interaction_scores[interaction_scores$avg_zscore_k <= -2, ] interaction_scores_deletion_suppressors_l <- interaction_scores[interaction_scores$avg_zscore_l <= -2, ] interaction_scores_deletion_suppressors_k <- interaction_scores[interaction_scores$avg_zscore_k >= 2, ] interaction_scores_deletion_enhancers_and_suppressors_l <- interaction_scores[interaction_scores$avg_zscore_l >= 2 | interaction_scores$avg_zscore_l <= -2, ] interaction_scores_deletion_enhancers_and_suppressors_k <- interaction_scores[interaction_scores$avg_zscore_k >= 2 | interaction_scores$avg_zscore_k <= -2, ] interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- interaction_scores[interaction_scores$z_lm_l >= 2 & interaction_scores$avg_zscore_l <= -2, ] interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- interaction_scores[interaction_scores$z_lm_l <= -2 & interaction_scores$avg_zscore_l >= 2, ] interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- interaction_scores[interaction_scores$z_lm_k <= -2 & interaction_scores$avg_zscore_k >= 2, ] interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- interaction_scores[interaction_scores$z_lm_k >= 2 & interaction_scores$avg_zscore_k <= -2, ] interaction_scores_deletion_enhancers_l <- interaction_scores_deletion_enhancers_l[ !is.na(interaction_scores_deletion_enhancers_l$OrfRep), ] interaction_scores_deletion_enhancers_k <- interaction_scores_deletion_enhancers_k[ !is.na(interaction_scores_deletion_enhancers_k$OrfRep), ] interaction_scores_deletion_suppressors_l <- interaction_scores_deletion_suppressors_l[ !is.na(interaction_scores_deletion_suppressors_l$OrfRep), ] interaction_scores_deletion_suppressors_k <- interaction_scores_deletion_suppressors_k[ !is.na(interaction_scores_deletion_suppressors_k$OrfRep), ] interaction_scores_deletion_enhancers_and_suppressors_l <- interaction_scores_deletion_enhancers_and_suppressors_l[ !is.na(interaction_scores_deletion_enhancers_and_suppressors_l$OrfRep), ] interaction_scores_deletion_enhancers_and_suppressors_k <- interaction_scores_deletion_enhancers_and_suppressors_k[ !is.na(interaction_scores_deletion_enhancers_and_suppressors_k$OrfRep), ] interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l[ !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l$OrfRep), ] interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l[ !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l$OrfRep), ] interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k[ !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k$OrfRep), ] interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k[ !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k$OrfRep), ] write.csv(interaction_scores_deletion_enhancers_l, file.path(out_dir, "zscores_interaction_deletion_enhancers_l.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_k, file.path(out_dir, "zscores_interaction_deletion_enhancers_k.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_suppressors_l, file.path(out_dir, "zscores_interaction_deletion_suppressors_l.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_suppressors_k, file.path(out_dir, "zscores_interaction_deletion_suppressors_k.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_and_suppressors_l, file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_and_suppressors_k, file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l, file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_l.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l, file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_l.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k, file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_k.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k, file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_k.csv"), row.names = FALSE) # Get enhancers and suppressors for linear regression interaction_scores_deletion_enhancers_l_lm <- interaction_scores[interaction_scores$z_lm_l >= 2, ] interaction_scores_deletion_enhancers_k_lm <- interaction_scores[interaction_scores$z_lm_k <= -2, ] interaction_scores_deletion_suppressors_l_lm <- interaction_scores[interaction_scores$z_lm_l <= -2, ] interaction_scores_deletion_suppressors_k_lm <- interaction_scores[interaction_scores$z_lm_k >= 2, ] interaction_scores_deletion_enhancers_and_suppressors_l_lm <- interaction_scores[interaction_scores$z_lm_l >= 2 | interaction_scores$z_lm_l <= -2, ] interaction_scores_deletion_enhancers_and_suppressors_k_lm <- interaction_scores[interaction_scores$z_lm_k >= 2 | interaction_scores$z_lm_k <= -2, ] interaction_scores_deletion_enhancers_l_lm <- interaction_scores_deletion_enhancers_l_lm[ !is.na(interaction_scores_deletion_enhancers_l_lm$OrfRep), ] interaction_scores_deletion_enhancers_k_lm <- interaction_scores_deletion_enhancers_k_lm[ !is.na(interaction_scores_deletion_enhancers_k_lm$OrfRep), ] interaction_scores_deletion_suppressors_l_lm <- interaction_scores_deletion_suppressors_l_lm[ !is.na(interaction_scores_deletion_suppressors_l_lm$OrfRep), ] interaction_scores_deletion_suppressors_k_lm <- interaction_scores_deletion_suppressors_k_lm[ !is.na(interaction_scores_deletion_suppressors_k_lm$OrfRep), ] interaction_scores_deletion_enhancers_and_suppressors_l_lm <- interaction_scores_deletion_enhancers_and_suppressors_l_lm[ !is.na(interaction_scores_deletion_enhancers_and_suppressors_l_lm$OrfRep), ] interaction_scores_deletion_enhancers_and_suppressors_k_lm <- interaction_scores_deletion_enhancers_and_suppressors_k_lm[ !is.na(interaction_scores_deletion_enhancers_and_suppressors_k_lm$OrfRep), ] write.csv(interaction_scores_deletion_enhancers_l_lm, file.path(out_dir, "zscores_interaction_deletion_enhancers_l_lm.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_k_lm, file.path(out_dir, "zscores_interaction_deletion_enhancers_k_lm.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_suppressors_l_lm, file.path(out_dir, "zscores_interaction_deletion_suppressors_l_lm.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_suppressors_k_lm, file.path(out_dir, "zscores_interaction_deletion_suppressors_k_lm.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_and_suppressors_l_lm, file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l_lm.csv"), row.names = FALSE) write.csv(interaction_scores_deletion_enhancers_and_suppressors_k_lm, file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k_lm.csv"), row.names = FALSE) # write.csv(Labels, study_info_file, row.names = FALSE) # write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE) for (i in 1:num_genes) { gene_sel <- unique(interaction_scores$OrfRep)[i] x_z_calculations <- x_stats_interaction_all[x_stats_interaction_all$OrfRep == gene_sel, ] df_int_scores <- interaction_scores[interaction_scores$OrfRep == gene_sel, ] p_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_l, 2))) + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_l, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_k[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_k)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_k, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_r[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$z_lm_r, 2))) + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() p_auc[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_auc)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$z_lm_auc, 2))) + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(x_z_calculations$conc_num_factor), labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { x_stats_interaction_all_final <- x_z_calculations } if (i > 1) { x_stats_interaction_all_final <- rbind(x_stats_interaction_all_final, x_z_calculations) } } print("Pass Int ggplot loop") write.csv(x_stats_interaction_all_final, file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) blank <- ggplot(df2_rf) + geom_blank() pdf(file.path(out_dir, "interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) x_stats_df2_rf <- ddply( df2_rf, c("conc_num", "conc_num_factor"), summarise, mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), max_l = max(l, na.rm = TRUE), min_l = min(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), max_k = max(k, na.rm = TRUE), min_k = min(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), max_auc = max(auc, na.rm = TRUE), min_auc = min(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + theme_publication() k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + theme_publication() r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + theme_publication() auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + theme_publication() l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + theme_publication() grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) # Plot the references # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 j <- rep(1, ((num_genes) / 3) - 1) for (n in 1:length(j)) { j[n + 1] <- n * 3 + 1 } # Loop for printing each plot num <- 0 for (m in 1:(round((num_genes) / 3) - 1)) { num <- j[m] grid.arrange(p_l[[num]], p_k[[num]], p_r[[num]], p_auc[[num]], p_l[[num + 1]], p_k[[num + 1]], p_r[[num + 1]], p_auc[[num + 1]], p_l[[num + 2]], p_k[[num + 2]], p_r[[num + 2]], p_auc[[num + 2]], ncol = 4, nrow = 3) # grid.arrange(p_l[[364]], p_k[[364]], p_r[[364]], # p_l[[365]], p_k[[365]], p_r[[365]], p_l[[366]], p_k[[366]], p_r[[366]], ncol = 3, nrow = 3) # p1[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_k[[num + 4]], p_r[[num + 4]] } if (num_genes != (num + 2)) { total_num <- num_genes - (num + 2) if (total_num == 5) { grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], p_auc[[num + 7]], blank, blank, blank, blank, ncol = 4, nrow = 3) } if (total_num == 4) { grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) } if (total_num == 3) { grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 2) { grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], blank, blank, blank, blank, ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 1) { grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } } dev.off() pdf(file.path(out_dir, "rf_interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) x_stats_df2_rf <- ddply( df2_rf, c("conc_num", "conc_num_factor"), summarise, mean_l = mean(l, na.rm = TRUE), median_l = median(l, na.rm = TRUE), max_l = max(l, na.rm = TRUE), min_l = min(l, na.rm = TRUE), sd_l = sd(l, na.rm = TRUE), mean_k = mean(k, na.rm = TRUE), median_k = median(k, na.rm = TRUE), max_k = max(k, na.rm = TRUE), min_k = min(k, na.rm = TRUE), sd_k = sd(k, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), mean_auc = mean(auc, na.rm = TRUE), median_auc = median(auc, na.rm = TRUE), max_auc = max(auc, na.rm = TRUE), min_auc = min(auc, na.rm = TRUE), sd_auc = sd(auc, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + theme_publication() k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + theme_publication() r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + theme_publication() auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + theme_publication() l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) + geom_boxplot() + scale_x_discrete( name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) + theme_publication() grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) # Plot the references # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 j <- rep(1, ((num_genes_rf) / 3) - 1) for (n in 1:length(j)) { j[n + 1] <- n * 3 + 1 } # Loop for printing each plot num <- 0 for (m in 1:(round((num_genes_rf) / 3) - 1)) { num <- j[m] grid.arrange(p_rf_l[[num]], p_rf_k[[num]], p_rf_r[[num]], p_rf_auc[[num]], p_rf_l[[num + 1]], p_rf_k[[num + 1]], p_rf_r[[num + 1]], p_rf_auc[[num + 1]], p_rf_l[[num + 2]], p_rf_k[[num + 2]], p_rf_r[[num + 2]], p_rf_auc[[num + 2]], ncol = 4, nrow = 3) # grid.arrange(p_rf_l[[364]], p_rf_k[[364]], p_rf_r[[364]], # p_rf_l[[365]], p_rf_k[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_k[[366]], p_rf_r[[366]], ncol = 3, nrow = 3) # p1[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]] } if (num_genes_rf != (num + 2)) { total_num <- num_genes_rf - (num + 2) if (total_num == 5) { grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], p_rf_auc[[num + 7]], blank, blank, blank, blank, ncol = 4, nrow = 3 ) } if (total_num == 4) { grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 ) } if (total_num == 3) { grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 2) { grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], blank, blank, blank, blank, ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 1) { grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } } dev.off() # Print rank plots for L and k gene interactions interaction_scores_adjust_missing <- interaction_scores interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_l), ]$avg_zscore_l <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_k), ]$avg_zscore_k <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_r), ]$avg_zscore_r <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_auc), ]$avg_zscore_auc <- 0.001 interaction_scores_adjust_missing$l_rank <- NA interaction_scores_adjust_missing$k_rank <- NA interaction_scores_adjust_missing$r_rank <- NA interaction_scores_adjust_missing$auc_rank <- NA interaction_scores_adjust_missing$l_rank <- rank(interaction_scores_adjust_missing$avg_zscore_l) interaction_scores_adjust_missing$k_rank <- rank(interaction_scores_adjust_missing$avg_zscore_k) interaction_scores_adjust_missing$r_rank <- rank(interaction_scores_adjust_missing$avg_zscore_r) interaction_scores_adjust_missing$auc_rank <- rank(interaction_scores_adjust_missing$avg_zscore_auc) interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_l), ]$z_lm_l <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_k), ]$z_lm_k <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_r), ]$z_lm_r <- 0.001 interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_auc), ]$z_lm_auc <- 0.001 interaction_scores_adjust_missing$l_rank_lm <- NA interaction_scores_adjust_missing$k_rank_lm <- NA interaction_scores_adjust_missing$r_rank_lm <- NA interaction_scores_adjust_missing$auc_rank_lm <- NA interaction_scores_adjust_missing$l_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_l) interaction_scores_adjust_missing$k_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_k) interaction_scores_adjust_missing$r_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_r) interaction_scores_adjust_missing$auc_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_auc) # Rank plots rank_l_1sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 1, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -1, ])[1])) + theme_publication() rank_l_2sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 2, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -2, ])[1])) + theme_publication() rank_l_3sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 3, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -3, ])[1])) + theme_publication() rank_k_1sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -1, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 1, ])[1])) + theme_publication() rank_k_2sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -2, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 2, ])[1])) + theme_publication() rank_k_3sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -3, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 3, ])[1])) + theme_publication() rank_l_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(out_dir, "rank_plots.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( rank_l_1sd, rank_l_2sd, rank_l_3sd, rank_k_1sd, rank_k_2sd, rank_k_3sd, ncol = 3, nrow = 2 ) grid.arrange( rank_l_1sd_notext, rank_l_2sd_notext, rank_l_3sd_notext, rank_k_1sd_notext, rank_k_2sd_notext, rank_k_3sd_notext, ncol = 3, nrow = 2 ) dev.off() rank_l_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 1, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -1, ])[1])) + theme_publication() rank_l_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 2, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -2, ])[1])) + theme_publication() rank_l_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 3, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -3, ])[1])) + theme_publication() rank_k_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -1, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 1, ])[1])) + theme_publication() rank_k_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -2, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 2, ])[1])) + theme_publication() rank_k_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -3, ])[1])) + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 3, ])[1])) + theme_publication() rank_l_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(out_dir, "rank_plots_lm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( rank_l_1sd_lm, rank_l_2sd_lm, rank_l_3sd_lm, rank_k_1sd_lm, rank_k_2sd_lm, rank_k_3sd_lm, ncol = 3, nrow = 2 ) grid.arrange( rank_l_1sd_notext_lm, rank_l_2sd_notext_lm, rank_l_3sd_notext_lm, rank_k_1sd_notext_lm, rank_k_2sd_notext_lm, rank_k_3sd_notext_lm, ncol = 3, nrow = 2 ) dev.off() df_na_rm <- interaction_scores[!is.na(interaction_scores$z_lm_l) | !is.na(interaction_scores$avg_zscore_l), ] # Find overlaps df_na_rm$Overlap <- "No Effect" try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Both") try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Both") try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= 2, ]$Overlap <- "Deletion Enhancer lm only") try(df_na_rm[df_na_rm$z_lm_l <= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only") try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= -2, ]$Overlap <- "Deletion Suppressor lm only") try(df_na_rm[df_na_rm$z_lm_l >= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only") try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_l <- lm(df_na_rm$z_lm_l ~ df_na_rm$avg_zscore_l) l_lm <- summary(get_lm_l) get_lm_k <- lm(df_na_rm$z_lm_k ~ df_na_rm$avg_zscore_k) k_lm <- summary(get_lm_k) get_lm_r <- lm(df_na_rm$z_lm_r ~ df_na_rm$avg_zscore_r) r_lm <- summary(get_lm_r) get_lm_auc <- lm(df_na_rm$z_lm_auc ~ df_na_rm$avg_zscore_auc) auc_lm <- summary(get_lm_auc) pdf(file.path(out_dir, "avg_zscore_vs_lm_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) print(ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(df_na_rm, aes(avg_zscore_k, z_lm_k)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm K") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(k_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(df_na_rm, aes(avg_zscore_r, z_lm_r)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm r") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(df_na_rm, aes(avg_zscore_auc, z_lm_auc)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm AUC") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(auc_lm$r.squared, 2))) + theme_publication_legend_right()) dev.off() lm_v_zscore_l <- ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right() pgg <- ggplotly(lm_v_zscore_l) plotly_path <- file.path(out_dir, "avg_zscore_vs_lm_na_rm.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) df_na_rm$l_rank <- rank(df_na_rm$avg_zscore_l) df_na_rm$k_rank <- rank(df_na_rm$avg_zscore_k) df_na_rm$r_rank <- rank(df_na_rm$avg_zscore_r) df_na_rm$auc_rank <- rank(df_na_rm$avg_zscore_auc) df_na_rm$l_rank_lm <- rank(df_na_rm$z_lm_l) df_na_rm$k_rank_lm <- rank(df_na_rm$z_lm_k) df_na_rm$r_rank_lm <- rank(df_na_rm$z_lm_r) df_na_rm$auc_rank_lm <- rank(df_na_rm$z_lm_auc) # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_l2 <- lm(df_na_rm$l_rank_lm ~ df_na_rm$l_rank) l_lm2 <- summary(get_lm_l2) get_lm_k2 <- lm(df_na_rm$k_rank_lm ~ df_na_rm$k_rank) k_lm2 <- summary(get_lm_k2) get_lm_r2 <- lm(df_na_rm$r_rank_lm ~ df_na_rm$r_rank) r_lm2 <- summary(get_lm_r2) get_lm_auc2 <- lm(df_na_rm$auc_rank_lm ~ df_na_rm$auc_rank) auc_lm2 <- summary(get_lm_auc2) num_genes_na_rm2 <- (dim(df_na_rm)[1]) / 2 pdf(file.path(out_dir, "avg_zscore_vs_lm_ranked_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) print( ggplot(df_na_rm, aes(l_rank, l_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm L") + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(l_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(df_na_rm, aes(k_rank, k_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm K") + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(k_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(df_na_rm, aes(r_rank, r_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm r") + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(df_na_rm, aes(auc_rank, auc_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank of Avg Zscore vs lm AUC") + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(auc_lm2$r.squared, 2))) + theme_publication_legend_right() ) dev.off() rank_l_1sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 1, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -1, ])[1])) + theme_publication() rank_l_2sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 2, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -2, ])[1])) + theme_publication() rank_l_3sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 3, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -3, ])[1])) + theme_publication() rank_k_1sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -1, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 1, ])[1])) + theme_publication() rank_k_2sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -2, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 2, ])[1])) + theme_publication() rank_k_3sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -3, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 3, ])[1])) + theme_publication() rank_l_1sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_2sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_3sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_1sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_2sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_3sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(out_dir, "rank_plots_narm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( rank_l_1sd, rank_l_2sd, rank_l_3sd, rank_k_1sd, rank_k_2sd, rank_k_3sd, ncol = 3, nrow = 2 ) grid.arrange( rank_l_1sd_notext, rank_l_2sd_notext, rank_l_3sd_notext, rank_k_1sd_notext, rank_k_2sd_notext, rank_k_3sd_notext, ncol = 3, nrow = 2 ) dev.off() rank_l_1sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 1, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -1, ])[1])) + theme_publication() rank_l_2sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 2, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -2, ])[1])) + theme_publication() rank_l_3sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 3, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -3, ])[1])) + theme_publication() rank_k_1sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -1, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 1, ])[1])) + theme_publication() rank_k_2sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -2, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 2, ])[1])) + theme_publication() rank_k_3sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -3, ])[1])) + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 3, ])[1])) + theme_publication() rank_l_1sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_2sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_l_3sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_1sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_2sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() rank_k_3sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(out_dir, "rank_plots_lm_narm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( rank_l_1sd_lm, rank_l_2sd_lm, rank_l_3sd_lm, rank_k_1sd_lm, rank_k_2sd_lm, rank_k_3sd_lm, ncol = 3, nrow = 2 ) grid.arrange( rank_l_1sd_notext_lm, rank_l_2sd_notext_lm, rank_l_3sd_notext_lm, rank_k_1sd_notext_lm, rank_k_2sd_notext_lm, rank_k_3sd_notext_lm, ncol = 3, nrow = 2 ) dev.off() } # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_1 <- lm(df_na_rm$z_lm_k ~ df_na_rm$z_lm_l) l_lm_1 <- summary(get_lm_1) get_lm_2 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_l) l_lm_2 <- summary(get_lm_2) get_lm_3 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_l) l_lm_3 <- summary(get_lm_3) get_lm_4 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_k) l_lm_4 <- summary(get_lm_4) get_lm_5 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_k) l_lm_5 <- summary(get_lm_5) get_lm_6 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_r) l_lm_6 <- summary(get_lm_6) pdf(file.path(out_dir, "correlation_cpps.pdf"), width = 10, height = 7, onefile = TRUE) ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) interaction_scores_rf2 <- interaction_scores_rf[!is.na(interaction_scores_rf$z_lm_l), ] ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_k), color = "cyan") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_r), color = "cyan") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_auc), color = "cyan") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_r), color = "cyan") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_auc), color = "cyan") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_point(data = interaction_scores_rf2, aes(z_lm_r, z_lm_auc), color = "cyan") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) dev.off() # write.csv(Labels, file.path("../Code/Parameters.csv"), row.names = FALSE) timestamp()