diff --git a/workflow/.vscode/extensions.json b/workflow/.vscode/extensions.json new file mode 100644 index 00000000..7de8a62b --- /dev/null +++ b/workflow/.vscode/extensions.json @@ -0,0 +1,34 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. + // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp + + // List of extensions which should be recommended for users of this workspace. +"recommendations": [ + "bierner.markdown-preview-github-styles", + "davidanson.vscode-markdownlint", + "dbaeumer.vscode-eslint", + "eamodio.gitlens", + "github.remotehub", + "github.vscode-pull-request-github", + "koppt.vscode-view-in-browser", + "mathworks.language-matlab", + "ms-python.debugpy", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-vscode-remote.remote-ssh", + "ms-vscode.remote-explorer", + "ms-vscode.remote-repositories", + "redhat.java", + "redhat.vscode-yaml", + "reditorsupport.r", + "rogalmic.bash-debug", + "streetsidesoftware.code-spell-checker", + "timonwong.shellcheck", + "yzhang.markdown-all-in-one" +], + // List of extensions recommended by VS Code that should not be recommended for users of this workspace. + "unwantedRecommendations": [ + + ] +} + diff --git a/workflow/.vscode/launch.json b/workflow/.vscode/launch.json new file mode 100644 index 00000000..7c2d333b --- /dev/null +++ b/workflow/.vscode/launch.json @@ -0,0 +1,50 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "R-Debugger", + "request": "attach", + "name": "Attach to R process", + "splitOverwrittenOutput": true + }, + { + "type": "R-Debugger", + "name": "Launch R-Workspace", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}" + }, + { + "type": "R-Debugger", + "name": "Debug R-File", + "request": "launch", + "debugMode": "file", + "workingDirectory": "${fileFolder}", + "file": "${file}", + }, + { + "type": "R-Debugger", + "name": "Debug R-Function", + "request": "launch", + "debugMode": "function", + "workingDirectory": "${workspaceFolder}", + "file": "${file}", + "mainFunction": "main", + "allowGlobalDebugging": false + }, + { + "type": "R-Debugger", + "name": "Debug R-Package", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}", + "includePackageScopes": true, + "loadPackages": [ + "." + ] + } + ] +} \ No newline at end of file diff --git a/workflow/apps/r/interactions.R b/workflow/apps/r/interactions.R index f13d85a7..3d396583 100644 --- a/workflow/apps/r/interactions.R +++ b/workflow/apps/r/interactions.R @@ -1,151 +1,53 @@ -#!/usr/bin/env Rscript +suppressMessages({ + library("ggplot2") + library("plotly") + library("htmlwidgets") + library("dplyr") + library("ggthemes") +}) -suppressMessages(library("ggplot2")) -suppressMessages(library("plyr")) -suppressMessages(library("extrafont")) -suppressMessages(library("gridExtra")) -suppressMessages(library("gplots")) -suppressMessages(library("RColorBrewer")) -suppressMessages(library("stringr")) -suppressMessages(library("gdata")) -suppressMessages(library("plotly")) -suppressMessages(library("htmlwidgets")) +# Constants for configuration +PLOT_WIDTH <- 14 +PLOT_HEIGHT <- 9 +BASE_SIZE <- 14 +# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date()) -# Print extra debugging during development -options(error = quote({ - dump.frames(to.file = TRUE, dumpto = "last.dump") - load("last.dump.rda") - print(last.dump) - q() -})) +options(warn = 2, max.print = 100) -# Parse arguments -args <- commandArgs(TRUE) -exp_number <- as.numeric(args[1]) -delta_bg_factor <- as.numeric(args[2]) -study_info_file <- file.path(args[3]) -sgd_gene_list <- file.path(args[4]) -input_file <- file.path(args[5]) -out_dir <- file.path(args[6]) - -out_dir_qc <- file.path(out_dir, "qc") - -if (!dir.exists(out_dir)) { - dir.create(out_dir) -} - -if (!dir.exists(out_dir_qc)) { - dir.create(out_dir_qc) -} - -# Read in the data -df <- read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) -df <- df[!(df[[1]] %in% c("", "Scan")), ] -# df <- df[!(df[[1]]%in%c(61:76)), ] # remove dAmp plates which are Scans 61 thru 76 -# df <- df[which(df$Specifics == "WT"), ] -# df_length <- length(df[1, ]) -# df_end <- length(df[1, ]) - 2 -# df <- df[, c(1:42, df_end:df_length)] - -# Use numbers -df$col <- as.numeric(df$Col) -df$row <- as.numeric(df$Row) -df$l <- as.numeric(df$l) -df$K <- as.numeric(df$K) -df$r <- as.numeric(df$r) -df$Scan <- as.numeric(df$Scan) -df$AUC <- as.numeric(df$AUC) -df$last_bg <- as.numeric(df$LstBackgrd) -df$first_bg <- as.numeric(df$X1stBackgrd) - -# Set delta background tolerance based on 3 sds from the mean delta background -df$delta_bg <- df$last_bg - df$first_bg -delta_bg_tolerance <- mean(df$delta_bg) + (delta_bg_factor * sd(df$delta_bg)) -# delta_bg_tolerance <- mean(df$delta_bg)+(3*sd(df$delta_bg)) -sprintf("The delta_bg_factor is: %d", delta_bg_factor) -sprintf("The delta_bg_tolerance is %f", delta_bg_tolerance) - -# Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col -# as was the case in Gemcitabin and Cytarabin experiments. -# The following allows user to rename columns so as to get the appropriate -# data where it needs to be for the script to run properly. -# colnames(df)[7] <- "Modifier1" -# colnames(df)[8] <- "Conc1" -# colnames(df)[10] <- "Drug" -# colnames(df)[11] <- "Conc" - -# Set the OrfRep to YDL227C for the ref data -df[df$ORF == "YDL227C", ]$OrfRep <- "YDL227C" - -# Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -# That is the first DM plates are removed from the data set with the following. -# This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin -# df <- df[df$Conc1 != "0ug/ml", ] -df <- df[df$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment - -# Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates -# This code removes those spots from the data set used for the interaction analysis -# Dr. Hartman feels that these do not affect Zscores significantly and so "non-curated" files were used -# try(df <- df[df$ORF != "bad_spot", ]) - -# Get total number of drug concentrations -total_conc_nums <- length(unique(df$Conc)) - -# Function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string) { - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -# Generate a new column with the numeric drug concs -df$conc_num <- as.numeric(numextract(df$Conc)) - -# Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -df$conc_num_factor <- as.numeric(as.factor(df$conc_num)) - 1 - -# Get the max factor for concentration -max_conc <- max(df$conc_num_factor) - -# If treating numbers not as factors uncomment next line and comment out previous line -# max_conc <- max(df$conc_num) - -# Remove wells with problems for making graphs and to not include in summary statistics -df <- df[df$Gene != "BLANK", ] -df <- df[df$Gene != "Blank", ] -df <- df[df$ORF != "Blank", ] -df <- df[df$Gene != "blank", ] -# df <- df[df$Gene != "HO", ] - -# Use sgd_gene_list to update orfs and replace empty geneName cells with ORF name -# This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -genes <- data.frame(read.delim( - file = sgd_gene_list, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))) -for (i in 1:length(df[, 14])) { - ii <- as.numeric(i) - line_num <- match(df[ii, 14], genes[, 1], nomatch = 1) - orf_rep_col_num <- as.numeric(match("OrfRep", names(df))) - if (df[ii, orf_rep_col_num] != "YDL227C") { - df[ii, 15] <- genes[line_num, 2] - } - if ((df[ii, 15] == "") || (df[ii, 15] == "OCT1")) { - df[ii, 15] <- df[ii, orf_rep_col_num] +# Function to parse arguments +parse_arguments <- function() { + if (interactive()) { + args <- c( + "1", + "3", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores" + ) + } else { + args <- commandArgs(trailingOnly = TRUE) } + list( + exp_number = as.numeric(args[1]), + delta_bg_factor = as.numeric(args[2]), + study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE), + sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE), + input_file = normalizePath(file.path(args[5]), mustWork = FALSE), + out_dir = normalizePath(file.path(args[6]), mustWork = FALSE), + out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE) + ) } -# Remove dAmPs -# jlh confirmed to leave dAmps in so comment out this section -# DAmPs_list <- "../Code/22_0602_Remy_DAmPsList.txt" -# Damps <- read.delim(DAmPs_list, header = F) -# df <- df[!(df$ORF %in% Damps$V1), ] # fix this to Damps[, 1] -# dfafterDampsRM = df # backup for debugging +args <- parse_arguments() +dir.create(args$out_dir, showWarnings = FALSE) +dir.create(args$out_dir_qc, showWarnings = FALSE) -# Theme elements for plots -theme_publication <- function(base_size = 14, base_family = "sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size = base_size, base_family = base_family) + - theme(plot.title = element_text( - face = "bold", - size = rel(1.2), hjust = 0.5), +# Define themes and scales +theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), @@ -153,3681 +55,184 @@ theme_publication <- function(base_size = 14, base_family = "sans") { axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), axis.line = element_line(colour = "black"), - axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", - legend.key.size = unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) - ) } -theme_publication_legend_right <- function(base_size = 14, base_family = "sans") { - (theme_foundation(base_size = base_size, base_family = base_family) + - theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold", size = rel(1)), - axis.title.y = element_text(angle = 90, vjust = 2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour = "black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour = "#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), +theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_publication(base_size, base_family) + + theme( legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), - legend.title = element_text(face = "italic"), - plot.margin = unit(c(10, 5, 5, 5), "mm"), - strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), - strip.text = element_text(face = "bold") + legend.title = element_text(face = "italic") ) - ) } scale_fill_publication <- function(...) { - discrete_scale( - "fill", - "Publication", - manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", - "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), - ... - ) + discrete_scale("fill", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) } scale_colour_publication <- function(...) { - discrete_scale( - "colour", - "Publication", - manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", - "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), - ... - ) + discrete_scale("colour", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) } -# Quality control -# Print quality control graphs for each dataset before removing contaminated data -# and before adjusting missing data to max theoretical values - -# Plate analysis plots to identify plate anomalies -plate_analysis_l <- - ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + - theme_publication() - -plate_analysis_k <- - ggplot(df, aes(Scan, K, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + - theme_publication() - -plate_analysis_r <- - ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + - theme_publication() - -plate_analysis_auc <- - ggplot(df, aes(Scan, AUC, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + - theme_publication() - -plate_analysis_l_box <- - ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + - theme_publication() - -plate_analysis_k_box <- - ggplot(df, aes(as.factor(Scan), K, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + - theme_publication() - -plate_analysis_r_box <- - ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + - theme_publication() - -plate_analysis_auc_box <- - ggplot(df, aes(as.factor(Scan), AUC, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + - theme_publication() - -# Report L and K values with a high delta background (likely contaminated) -# Report the number to be removed based on the delta_bg_tolerance - -# Raw l vs k before QC -raw_l_vs_k_before_qc <- - ggplot(df, aes(l, K, color = as.factor(conc_num))) + - geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + - ggtitle("Raw L vs K before QC") + - theme_publication_legend_right() - -pdf(file.path(out_dir_qc, "raw_l_vs_k_before_qc.pdf"), width = 12, height = 8) -raw_l_vs_k_before_qc - -invisible(dev.off()) - -pgg <- ggplotly(raw_l_vs_k_before_qc) -plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_before_qc.html") -saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - -plate_analysis_delta_bg <- - ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2, position = "jitter") + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + - theme_publication() - -plate_analysis_delta_bg_box <- - ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + - theme_publication() - -x_delta_bg_above_tolerance <- - df[df$delta_bg >= delta_bg_tolerance, ] -x_delta_bg_above_tolerance_k_halfmedian <- - (median(x_delta_bg_above_tolerance$K, na.rm = TRUE)) / 2 -x_delta_bg_above_tolerance_l_halfmedian <- - (median(x_delta_bg_above_tolerance$l, na.rm = TRUE)) / 2 -x_delta_bg_above_tolerance_to_remove <- - dim(x_delta_bg_above_tolerance)[1] - -x_delta_bg_above_tolerance_l_vs_k <- - ggplot(x_delta_bg_above_tolerance, aes(l, K, color = as.factor(conc_num))) + - geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of", delta_bg_tolerance, "or above")) + - annotate("text", - x = x_delta_bg_above_tolerance_l_halfmedian, - y = x_delta_bg_above_tolerance_k_halfmedian, - label = paste("Strains above delta background tolerance = ", x_delta_bg_above_tolerance_to_remove)) + - theme_publication_legend_right() - -pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.pdf"), width = 12, height = 8) -x_delta_bg_above_tolerance_l_vs_k - -invisible(dev.off()) - -pgg <- ggplotly(x_delta_bg_above_tolerance_l_vs_k) -plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.html") -saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - -# Frequency plot for all data vs. the delta_background -delta_bg_frequency_plot <- - ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + - geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + - theme_publication_legend_right() - -# Bar plot for all data vs. the delta_background -delta_bg_bar_plot <- - ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + - geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + - theme_publication_legend_right() - -pdf(file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8) -# print(delta_bg_frequency_plot) -# print(delta_bg_bar_plot) -invisible(dev.off()) - -# Need to identify missing data, and differentiate between this data and removed data -# so the removed data can get set to NA and the missing data can get set to max theoretical values -# 1 for missing data, 0 for non missing data -# Use "NG" for NoGrowth rather than "missing" -df$NG <- 0 -try(df[df$l == 0 & !is.na(df$l), ]$NG <- 1) - -# 1 for removed data, 0 non removed data -# Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -df$DB <- 0 -try(df[df$delta_bg >= delta_bg_tolerance, ]$DB <- 1) - -# Replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(df[df$delta_bg >= delta_bg_tolerance, ]$l <- NA) -try(df[df$delta_bg >= delta_bg_tolerance, ]$r <- NA) -try(df[df$delta_bg >= delta_bg_tolerance, ]$AUC <- NA) -try(df[df$delta_bg >= delta_bg_tolerance, ]$K <- NA) - -# QC Plots -plate_analysis_l_after_qc <- - ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + - theme_publication() - -plate_analysis_k_after_qc <- - ggplot(df, aes(Scan, K, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for k after quality control") + - theme_publication() - -plate_analysis_r_after_qc <- - ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + - theme_publication() - -plate_analysis_auc_after_qc <- - ggplot(df, aes(Scan, AUC, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + - theme_publication() - -plate_analysis_delta_bg_after_qc <- - ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + - theme_publication() - -plate_analysis_l_box_after_qc <- - ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + - theme_publication() - -plate_analysis_k_box_after_qc <- - ggplot(df, aes(as.factor(Scan), K, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + - theme_publication() - -plate_analysis_r_box_after_qc <- - ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + - theme_publication() - -plate_analysis_auc_box_after_qc <- - ggplot(df, aes(as.factor(Scan), AUC, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + - theme_publication() - -plate_analysis_delta_bg_box_after_qc <- - ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + - theme_publication() - -# Print the plate analysis data before and after QC -pdf(file.path(out_dir_qc, "plate_analysis.pdf"), width = 14, height = 9) -plate_analysis_l -plate_analysis_l_after_qc -plate_analysis_k -plate_analysis_k_after_qc -plate_analysis_r -plate_analysis_r_after_qc -plate_analysis_auc -plate_analysis_auc_after_qc -plate_analysis_delta_bg -plate_analysis_delta_bg_after_qc -invisible(dev.off()) - -# Print the plate analysis data before and after QC -pdf(file.path(out_dir_qc, "plate_analysis_boxplots.pdf"), width = 18, height = 9) -plate_analysis_l_box -plate_analysis_l_box_after_qc -plate_analysis_k_box -plate_analysis_k_box_after_qc -plate_analysis_r_box -plate_analysis_r_box_after_qc -plate_analysis_auc_box -plate_analysis_auc_box_after_qc -plate_analysis_delta_bg_box -plate_analysis_delta_bg_box_after_qc -invisible(dev.off()) - -# Remove the zero values and print plate analysis -x_no_zero <- df[which(df$l > 0), ] -plate_analysis_l_after_qc_z <- - ggplot(x_no_zero, aes(Scan, l, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + - theme_publication() - -plate_analysis_k_after_qc_z <- - ggplot(x_no_zero, aes(Scan, K, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + - theme_publication() - -plate_analysis_r_after_qc_z <- - ggplot(x_no_zero, aes(Scan, r, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + - theme_publication() - -plate_analysis_auc_after_qc_z <- - ggplot(x_no_zero, aes(Scan, AUC, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + - theme_publication() - -plate_analysis_delta_bg_after_qc_z <- - ggplot(x_no_zero, aes(Scan, delta_bg, color = as.factor(conc_num))) + - geom_point(shape = 3, size = 0.2) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar") + - stat_summary( - fun = mean, - geom = "point", - size = 0.6) + - ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + - theme_publication() - -plate_analysis_l_box_after_qc_z <- - ggplot(x_no_zero, aes(as.factor(Scan), l, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + - theme_publication() - -plate_analysis_k_box_after_qc_z <- - ggplot(x_no_zero, aes(as.factor(Scan), K, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + - theme_publication() - -plate_analysis_r_box_after_qc_z <- - ggplot(x_no_zero, aes(as.factor(Scan), r, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + - theme_publication() - -plate_analysis_auc_box_after_qc_z <- - ggplot(x_no_zero, aes(as.factor(Scan), AUC, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + - theme_publication() - -plate_analysis_delta_bg_box_after_qc_z <- - ggplot(x_no_zero, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + - geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + - theme_publication() - -# Print the plate analysis data before and after QC -pdf(file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9) -plate_analysis_l_after_qc_z -plate_analysis_k_after_qc_z -plate_analysis_r_after_qc_z -plate_analysis_auc_after_qc_z -plate_analysis_delta_bg_after_qc_z -invisible(dev.off()) - -# Print the plate analysis data before and after QC -pdf(file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9) -plate_analysis_l_box_after_qc_z -plate_analysis_k_box_after_qc_z -plate_analysis_r_box_after_qc_z -plate_analysis_auc_box_after_qc_z -plate_analysis_delta_bg_box_after_qc_z -invisible(dev.off()) - -# Remove dataset with zeros removed -rm(x_no_zero) - -# df_test_missing_and_removed <- df[df$Removed == 1, ] - -# Calculate summary statistics for all strains, including both background and the deletions -x_stats_all <- ddply( - df, - c("conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - max_l = max(l, na.rm = TRUE), - min_l = min(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - se_l = sd_l / sqrt(N - 1), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - max_k = max(K, na.rm = TRUE), - min_k = min(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - se_k = sd_k / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - se_r = sd_r / sqrt(N - 1), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - max_auc = max(AUC, na.rm = TRUE), - min_auc = min(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - se_auc = sd_auc / sqrt(N - 1) -) - -# print(x_stats_all_l) -write.csv(x_stats_all, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) - -# Part 3 - Generate summary statistics and calculate the max theoretical L value -# Calculate the Z score at each drug conc for each deletion strain - -# Get the background strains - can be modified to take another argument but for most screens will just be YDL227C -background_strains <- c("YDL227C") - -# First part of loop will go through for each background strain -# In most cases there will only be one YDL227C -for (s in background_strains) { - x_background <- df[df$OrfRep == s, ] - - # If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - # we may want to consider in some cases giving the max high value to L depending on the data type - if (table(x_background$l)[1] == 0) { - x_background[x_background$l == 0, ]$l <- NA - x_background[x_background$K == 0, ]$K <- NA - x_background[x_background$r == 0, ]$r <- NA - x_background[x_background$AUC == 0, ]$AUC <- NA - } - - x_background <- x_background[!is.na(x_background$l), ] - - # Get summary stats for L, K, R, AUC - x_stats_by_l <- ddply( - x_background, - c("OrfRep", "conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean = mean(l, na.rm = TRUE), - median = median(l, na.rm = TRUE), - max = max(l, na.rm = TRUE), - min = min(l, na.rm = TRUE), - sd = sd(l, na.rm = TRUE), - se = sd / sqrt(N - 1) - ) - - # print(x_stats_by_l) - x1_sd <- max(x_stats_by_l$sd) - - x_stats_by_k <- ddply( - x_background, - c("OrfRep", "conc_num", "conc_num_factor"), - summarise, - N = (length(K)), - mean = mean(K, na.rm = TRUE), - median = median(K, na.rm = TRUE), - max = max(K, na.rm = TRUE), - min = min(K, na.rm = TRUE), - sd = sd(K, na.rm = TRUE), - se = sd / sqrt(N - 1) - ) - - x1_sd_k <- max(x_stats_by_k$sd) - - x_stats_by_r <- ddply( - x_background, - c("OrfRep", "conc_num", "conc_num_factor"), - summarise, - N = length(r), - mean = mean(r, na.rm = TRUE), - median = median(r, na.rm = TRUE), - max = max(r, na.rm = TRUE), - min = min(r, na.rm = TRUE), - sd = sd(r, na.rm = TRUE), - se = sd / sqrt(N - 1) - ) - - x1_sd_r <- max(x_stats_by_r$sd) - - x_stats_by_auc <- ddply( - x_background, - c("OrfRep", "conc_num", "conc_num_factor"), - summarise, - N = length(AUC), - mean = mean(AUC, na.rm = TRUE), - median = median(AUC, na.rm = TRUE), - max = max(AUC, na.rm = TRUE), - min = min(AUC, na.rm = TRUE), - sd = sd(AUC, na.rm = TRUE), - se = sd / sqrt(N - 1) - ) - - x1_sd_auc <- max(x_stats_by_auc$sd) - - x_stats_by <- ddply( - x_background, - c("OrfRep", "conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - max_l = max(l, na.rm = TRUE), - min_l = min(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - se_l = sd_l / sqrt(N - 1), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - max_k = max(K, na.rm = TRUE), - min_k = min(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - se_k = sd_k / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - se_r = sd_r / sqrt(N - 1), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - max_auc = max(AUC, na.rm = TRUE), - min_l = min(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - se_auc = sd_auc / sqrt(N - 1) - ) - - write.csv(x_stats_by, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE) - - # Calculate the max theoretical L values - # Only look for max values when K is within 2sd of the ref strain - for (q in unique(df$conc_num_factor)) { - if (q == 0) { - x_within_2sd_k <- - df[df$conc_num_factor == q, ] - x_within_2sd_k <- - x_within_2sd_k[!is.na(x_within_2sd_k$l), ] - x_stats_temp_k <- - x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] - x_within_2sd_k <- - x_within_2sd_k[x_within_2sd_k$K >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] - x_within_2sd_k <- - x_within_2sd_k[x_within_2sd_k$K <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] - x_outside_2sd_k <- - df[df$conc_num_factor == q, ] - x_outside_2sd_k <- - x_outside_2sd_k[!is.na(x_outside_2sd_k$l), ] - # x_outside_2sd_k_temp <- - # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] - x_outside_2sd_k <- - x_outside_2sd_k[ - x_outside_2sd_k$K <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | - x_outside_2sd_k$K >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] - # x_outside_2sd_k <- - # x_outside_2sd_k[x_outside_2sd_k$K >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])), ] - } - if (q > 0) { - x_within_2sd_k_temp <- - df[df$conc_num_factor == q, ] - x_within_2sd_k_temp <- - x_within_2sd_k_temp[!is.na(x_within_2sd_k_temp$l), ] - x_stats_temp_k <- - x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] - x_within_2sd_k_temp <- - x_within_2sd_k_temp[x_within_2sd_k_temp$K >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] - x_within_2sd_k_temp <- - x_within_2sd_k_temp[x_within_2sd_k_temp$K <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] - x_within_2sd_k <- - rbind(x_within_2sd_k, x_within_2sd_k_temp) - x_outside_2sd_k_temp <- - df[df$conc_num_factor == q, ] - x_outside_2sd_k_temp <- - x_outside_2sd_k_temp[!is.na(x_outside_2sd_k_temp$l), ] - # x_outside_2sd_k_temp <- - # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] - x_outside_2sd_k_temp <- - x_outside_2sd_k_temp[ - x_outside_2sd_k_temp$K <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | - x_outside_2sd_k_temp$K >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] - # x_outside_2sd_k_temp <- - # x_outside_2sd_k_temp[x_outside_2sd_k_temp$K >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])) , ] - x_outside_2sd_k <- - rbind(x_outside_2sd_k, x_outside_2sd_k_temp) - } - } - - x_stats_by_l_within_2sd_k <- ddply( - x_within_2sd_k, - c("conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l, na.rm = TRUE), - min = min(l, na.rm = TRUE), - sd = sd(l), - se = sd / sqrt(N - 1), - z_max = (max - mean) / sd - ) - - # print(x_stats_by_l_within_2sd_k) - - x1_sd_within_2sd_k <- max(x_stats_by_l_within_2sd_k$sd) - - write.csv( - x_stats_by_l_within_2sd_k, - file.path(out_dir_qc, "max_observed_l_vals_for_spots_within_2sd_k.csv"), - row.names = FALSE - ) - - x_stats_by_l_outside_2sd_k <- ddply( - x_outside_2sd_k, - c("conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l, na.rm = TRUE), - min = min(l, na.rm = TRUE), - sd = sd(l), - se = sd / sqrt(N - 1) - ) - - # print(x_stats_by_l_outside_2sd_k) - x1_sd_outside_2sd_k <- max(x_stats_by_l_outside_2sd_k$sd) - - # x1_sd_outside_2sd_k <- df[df$l %in% x1_sd_within_2sd_k$l, ] - outside_2sd_k_l_vs_k <- - ggplot(x_outside_2sd_k, aes(l, K, color = as.factor(conc_num))) + - geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + - ggtitle("Raw L vs K for strains falling outside 2sd of the K mean at each conc") + - theme_publication_legend_right() - - pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) - # print(outside_2sd_k_l_vs_k) - - invisible(dev.off()) - - pgg <- ggplotly(outside_2sd_k_l_vs_k) - plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_outside_2sd_k.html") - saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - - outside_2sd_k_delta_background_vs_k <- - ggplot(x_outside_2sd_k, aes(delta_bg, K, color = as.factor(conc_num))) + - geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2sd of the K mean at each conc") + - theme_publication_legend_right() - - pdf(file.path(out_dir_qc, "delta_background_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) - outside_2sd_k_delta_background_vs_k - invisible(dev.off()) - pgg <- ggplotly(outside_2sd_k_delta_background_vs_k) - - # pgg - plotly_path <- file.path(out_dir_qc, "delta_background_vs_k_for_strains_outside_2sd_k.html") - saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - - # Get the background strain mean values at the no drug conc to calculate shift - background_l <- x_stats_by_l$mean[1] - background_k <- x_stats_by_k$mean[1] - background_r <- x_stats_by_r$mean[1] - background_auc <- x_stats_by_auc$mean[1] - - # Create empty plots for plotting element - p_l <- ggplot() - p_k <- ggplot() - p_r <- ggplot() - p_auc <- ggplot() - - p_rf_l <- ggplot() - p_rf_k <- ggplot() - p_rf_r <- ggplot() - p_rf_auc <- ggplot() - - # Get only the deletion strains - df2 <- df - df2 <- df2[df2$OrfRep != "YDL227C", ] - - # If set to max theoretical value, add a 1 to SM, if not, leave as 0 - # SM = Set to Max - df2$SM <- 0 - # Set the missing values to the highest theoretical value at each drug conc for L - # Leave other values as 0 for the max/min - for (i in 1:length(unique(df2$conc_num))) { - concentration <- unique(df2$conc_num)[i] - df2_temp <- df2[df2$conc_num == concentration, ] - if (concentration == 0) { - df2_new <- df2_temp - sprintf("Check loop order, conc = %f", concentration) - } - if (concentration > 0) { - try(df2_temp[df2_temp$l == 0 & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) - try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) - try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) - # df2_temp[df2_temp$K == 0, ]$K <- x_stats_all_k$max[i] - # df2_temp[df2_temp$r == 0, ]$r <- x_stats_all_r$max[i] - # df2_temp[df2_temp$AUC == 0, ]$AUC <- x_stats_all_auc$max[i] - sprintf("Check loop order, conc = %f", concentration) - df2_new <- rbind(df2_new, df2_temp) - } - } - - df2 <- df2_new - - # Get only the RF strains - df2_rf <- df - df2_rf <- df2_rf[df2_rf$OrfRep == "YDL227C", ] - # If set to max theoretical value, add a 1 to SM, if not, leave as 0 - # SM = Set to Max - df2_rf$SM <- 0 - # Set the missing values to the highest theoretical value at each drug conc for L - # Leave other values as 0 for the max/min - for (i in 1:length(unique(df2_rf$conc_num))) { - concentration <- unique(df2_rf$conc_num)[i] - df2_rf_temp <- df2_rf[df2_rf$conc_num == concentration, ] - if (concentration == 0) { - df2_rf_new <- df2_rf_temp - sprintf("Check loop order, conc = %f", concentration) - } - if (concentration > 0) { - try(df2_rf_temp[df2_rf_temp$l == 0 & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) - try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) - try(df2_rf_temp[df2_rf_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) - sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", concentration) - df2_rf_new <- rbind(df2_rf_new, df2_rf_temp) - } - } - df2_rf <- df2_rf_new - - # Part 4 Get the RF Z score values - # Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - df2_rf$OrfRep <- paste(df2_rf$OrfRep, df2_rf$Gene, df2_rf$Num., sep = "_") - - num_genes_rf <- length(unique(df2_rf$OrfRep)) - # print(num_genes_rf) - - # Create the output data.frame containing columns for each RF strain - interaction_scores_rf <- unique(df2_rf["OrfRep"]) - # interaction_scores_rf$Gene <- unique(df2$Gene) - interaction_scores_rf$Gene <- NA - interaction_scores_rf$raw_shift_l <- NA - interaction_scores_rf$z_shift_l <- NA - interaction_scores_rf$lm_score_l <- NA - interaction_scores_rf$z_lm_l <- NA - interaction_scores_rf$r_squared_l <- NA - interaction_scores_rf$sum_z_score_l <- NA - interaction_scores_rf$avg_zscore_l <- NA - interaction_scores_rf$raw_shift_k <- NA - interaction_scores_rf$z_shift_k <- NA - interaction_scores_rf$lm_score_k <- NA - interaction_scores_rf$z_lm_k <- NA - interaction_scores_rf$r_squared_k <- NA - interaction_scores_rf$sum_z_score_k <- NA - interaction_scores_rf$avg_zscore_k <- NA - interaction_scores_rf$raw_shift_r <- NA - interaction_scores_rf$z_shift_r <- NA - interaction_scores_rf$lm_score_r <- NA - interaction_scores_rf$z_lm_r <- NA - interaction_scores_rf$r_squared_r <- NA - interaction_scores_rf$sum_z_score_r <- NA - interaction_scores_rf$avg_zscore_r <- NA - interaction_scores_rf$raw_shift_auc <- NA - interaction_scores_rf$z_shift_auc <- NA - interaction_scores_rf$lm_score_auc <- NA - interaction_scores_rf$z_lm_auc <- NA - interaction_scores_rf$r_squared_auc <- NA - interaction_scores_rf$sum_z_score_auc <- NA - interaction_scores_rf$avg_zscore_auc <- NA - interaction_scores_rf$NG <- NA - interaction_scores_rf$SM <- NA - - for (i in 1:num_genes_rf) { - # Get each deletion strain ORF - gene_sel <- unique(df2_rf$OrfRep)[i] - # Extract only the current deletion strain and its data - x_gene_sel <- df2_rf[df2_rf$OrfRep == gene_sel, ] - - x_stats_interaction <- ddply( - x_gene_sel, - c("OrfRep", "Gene", "conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - se_l = sd_l / sqrt(N - 1), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - se_k = sd_k / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - se_r = sd_r / sqrt(N - 1), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - se_auc = sd_auc / sqrt(N - 1), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) - - # Get shift vals - # if L is 0, that means the no growth on no drug - # if L is NA at 0, that means the spot was removed due to contamination - # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - # otherwise calculate shift at no drug conc - if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { - x_stats_interaction$raw_shift_l <- 0 - x_stats_interaction$raw_shift_k <- 0 - x_stats_interaction$raw_shift_r <- 0 - x_stats_interaction$raw_shift_auc <- 0 - x_stats_interaction$z_shift_l <- 0 - x_stats_interaction$z_shift_k <- 0 - x_stats_interaction$z_shift_r <- 0 - x_stats_interaction$z_shift_auc <- 0 - } else { - x_stats_interaction$raw_shift_l <- x_stats_interaction$mean_l[1] - background_l - x_stats_interaction$raw_shift_k <- x_stats_interaction$mean_k[1] - background_k - x_stats_interaction$raw_shift_r <- x_stats_interaction$mean_r[1] - background_r - x_stats_interaction$raw_shift_auc <- x_stats_interaction$mean_auc[1] - background_auc - x_stats_interaction$z_shift_l <- x_stats_interaction$raw_shift_l[1] / x_stats_by_l$sd[1] - x_stats_interaction$z_shift_k <- x_stats_interaction$raw_shift_k[1] / x_stats_by_k$sd[1] - x_stats_interaction$z_shift_r <- x_stats_interaction$raw_shift_r[1] / x_stats_by_r$sd[1] - x_stats_interaction$z_shift_auc <- x_stats_interaction$raw_shift_auc[1] / x_stats_by_auc$sd[1] - } - - # Get WT vals - x_stats_interaction$WT_l <- x_stats_by_l$mean - x_stats_interaction$WT_k <- x_stats_by_k$mean - x_stats_interaction$WT_r <- x_stats_by_r$mean - x_stats_interaction$WT_auc <- x_stats_by_auc$mean - - # Get WT SD - x_stats_interaction$WT_sd_l <- x_stats_by_l$sd - x_stats_interaction$WT_sd_k <- x_stats_by_k$sd - x_stats_interaction$WT_sd_r <- x_stats_by_r$sd - x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd - - # Only get scores if there's growth at no drug - if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { - - # Calculate expected values - x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$raw_shift_l - x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$raw_shift_k - x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$raw_shift_r - x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$raw_shift_auc - - # Calculate normalized delta values - x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l - x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k - x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r - x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc - - # Disregard shift for no growth values in Z score calculation - if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc - - } - # Disregard shift for set to max values in Z score calculation - if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l - # Only calculate the L value without shift since L is the only adjusted value - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_AU - } - - # Calculate Z score at each concentration - x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) - x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) - x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) - x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) - - # Get linear model - gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) - gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) - gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) - gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) - - # Get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] - r_squared_l <- summary(gene_lm_l)$r.squared - gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] - r_squared_k <- summary(gene_lm_k)$r.squared - gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] - r_squared_auc <- summary(gene_lm_auc)$r.squared - - # Get total of non removed values - num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 - - # Report the scores - interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- - x_gene_sel$OrfRep[1] - interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- - x_gene_sel$Gene[1] - interaction_scores_rf$raw_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_l[1] - interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_l[1] - interaction_scores_rf$lm_score_l[interaction_scores_rf$OrfRep == gene_sel] <- - gene_interaction_l - interaction_scores_rf$r_squared_l[interaction_scores_rf$OrfRep == gene_sel] <- - r_squared_l - interaction_scores_rf$sum_z_score_l[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_l, na.rm = TRUE) - interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) - interaction_scores_rf$raw_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_k[1] - interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_k[1] - interaction_scores_rf$lm_score_k[interaction_scores_rf$OrfRep == gene_sel] <- - gene_interaction_k - interaction_scores_rf$r_squared_k[interaction_scores_rf$OrfRep == gene_sel] <- - r_squared_k - interaction_scores_rf$sum_z_score_k[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_k, na.rm = TRUE) - interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) - interaction_scores_rf$raw_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_r[1] - interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_r[1] - interaction_scores_rf$lm_score_r[interaction_scores_rf$OrfRep == gene_sel] <- - gene_interaction_r - interaction_scores_rf$r_squared_r[interaction_scores_rf$OrfRep == gene_sel] <- - r_squared_r - interaction_scores_rf$sum_z_score_r[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_r, na.rm = TRUE) - interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) - interaction_scores_rf$raw_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_auc[1] - interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_auc[1] - interaction_scores_rf$lm_score_auc[interaction_scores_rf$OrfRep == gene_sel] <- - gene_interaction_auc - interaction_scores_rf$r_squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- - r_squared_auc - interaction_scores_rf$sum_z_score_auc[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_auc, na.rm = TRUE) - interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) - } - - if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { - - # Calculate expected values - x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$raw_shift_l - x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$raw_shift_k - x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$raw_shift_r - x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$raw_shift_auc - - # Calculate normalized delta values - x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l - x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k - x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r - x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc - - # Disregard shift for missing values in Z score calculation - if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc - } - # Disregard shift for set to max values in Z score calculation - if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l - # Only calculate the L value without shift since L is the only adjusted value - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc - - } - - # Calculate Z score at each concentration - x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) - x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) - x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) - x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) - - # NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_l <- NA - gene_lm_k <- NA - gene_lm_r <- NA - gene_lm_auc <- NA - gene_interaction_l <- NA - r_squared_l <- NA - gene_interaction_k <- NA - r_squared_k <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_auc <- NA - r_squared_auc <- NA - x_stats_interaction$raw_shift_l <- NA - x_stats_interaction$raw_shift_k <- NA - x_stats_interaction$raw_shift_r <- NA - x_stats_interaction$raw_shift_auc <- NA - x_stats_interaction$z_shift_l <- NA - x_stats_interaction$z_shift_k <- NA - x_stats_interaction$z_shift_r <- NA - x_stats_interaction$z_shift_auc <- NA - interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$OrfRep[1] - interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$Gene[1] - interaction_scores_rf$raw_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_l[1] - interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] - interaction_scores_rf$lm_score_l[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_l - interaction_scores_rf$r_squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l - interaction_scores_rf$sum_z_score_l[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$raw_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_k[1] - interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] - interaction_scores_rf$lm_score_k[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_k - interaction_scores_rf$r_squared_k[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_k - interaction_scores_rf$sum_z_score_k[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$raw_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_r[1] - interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] - interaction_scores_rf$lm_score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r - interaction_scores_rf$r_squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r - interaction_scores_rf$sum_z_score_r[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$raw_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_auc[1] - interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] - interaction_scores_rf$lm_score_auc[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_auc - interaction_scores_rf$r_squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_auc - interaction_scores_rf$sum_z_score_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA - interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA - } - - if (i == 1) { - x_stats_interaction_all_rf <- x_stats_interaction - } - - if (i > 1) { - x_stats_interaction_all_rf <- rbind(x_stats_interaction_all_rf, x_stats_interaction) - } - - interaction_scores_rf$NG[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) - interaction_scores_rf$DB[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) - interaction_scores_rf$SM[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) - - # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) - } - - print("Pass RF Calculation loop") - - lm_sd_l <- sd(interaction_scores_rf$lm_score_l, na.rm = TRUE) - lm_sd_k <- sd(interaction_scores_rf$lm_score_k, na.rm = TRUE) - lm_sd_r <- sd(interaction_scores_rf$lm_score_r, na.rm = TRUE) - lm_sd_auc <- sd(interaction_scores_rf$lm_score_auc, na.rm = TRUE) - lm_mean_l <- mean(interaction_scores_rf$lm_score_l, na.rm = TRUE) - lm_mean_k <- mean(interaction_scores_rf$lm_score_k, na.rm = TRUE) - lm_mean_r <- mean(interaction_scores_rf$lm_score_r, na.rm = TRUE) - lm_mean_auc <- mean(interaction_scores_rf$lm_score_auc, na.rm = TRUE) - - print(paste("Mean RF linear regression score L", lm_mean_l)) - - interaction_scores_rf$z_lm_l <- (interaction_scores_rf$lm_score_l - lm_mean_l) / (lm_sd_l) - interaction_scores_rf$z_lm_k <- (interaction_scores_rf$lm_score_k - lm_mean_k) / (lm_sd_k) - interaction_scores_rf$z_lm_r <- (interaction_scores_rf$lm_score_r - lm_mean_r) / (lm_sd_r) - interaction_scores_rf$z_lm_auc <- (interaction_scores_rf$lm_score_auc - lm_mean_auc) / (lm_sd_auc) - interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$z_lm_l, decreasing = TRUE), ] - interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$NG, decreasing = TRUE), ] - write.csv(interaction_scores_rf, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) - - for (i in 1:num_genes_rf) { - gene_sel <- unique(interaction_scores_rf$OrfRep)[i] - x_z_calculations <- x_stats_interaction_all_rf[x_stats_interaction_all_rf$OrfRep == gene_sel, ] - df_int_scores <- interaction_scores_rf[interaction_scores_rf$OrfRep == gene_sel, ] - - p_rf_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(df_int_scores$avg_zscore_l, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$z_lm_l, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num)) - ) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() - - p_rf_k[[i]] <- ggplot( - x_z_calculations, aes(conc_num_factor, delta_k)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(df_int_scores$z_lm_l, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num)) - ) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() - - p_rf_r[[i]] <- ggplot( - x_z_calculations, aes(conc_num_factor, delta_r)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-0.65, 0.65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(df_int_scores$z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num)) - ) + - scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + - theme_publication() - - p_rf_auc[[i]] <- ggplot( - x_z_calculations, aes(conc_num_factor, delta_auc)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + - annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(df_int_scores$z_lm_auc, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num)) - ) + - scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + - theme_publication() - - if (i == 1) { - x_stats_interaction_all_rf_final <- x_z_calculations - } - if (i > 1) { - x_stats_interaction_all_rf_final <- rbind(x_stats_interaction_all_rf_final, x_z_calculations) - } - } - print("Pass RF ggplot loop") - write.csv(x_stats_interaction_all_rf_final, file.path(out_dir, "rf_zscore_calculations.csv"), row.names = FALSE) - - # Part 5 - Get Zscores for Gene deletion strains - - # Get total number of genes for the next loop - num_genes <- length(unique(df2$OrfRep)) - # print(num_genes) - - # Create the output data.frame containing columns for each deletion strain - interaction_scores <- unique(df2["OrfRep"]) - # interaction_scores$Gene <- unique(df2$Gene) - interaction_scores$Gene <- NA - interaction_scores$raw_shift_l <- NA - interaction_scores$z_shift_l <- NA - interaction_scores$lm_score_l <- NA - interaction_scores$z_lm_l <- NA - interaction_scores$r_squared_l <- NA - interaction_scores$sum_z_score_l <- NA - interaction_scores$avg_zscore_l <- NA - interaction_scores$raw_shift_k <- NA - interaction_scores$z_shift_k <- NA - interaction_scores$lm_score_k <- NA - interaction_scores$z_lm_k <- NA - interaction_scores$r_squared_k <- NA - interaction_scores$sum_z_score_k <- NA - interaction_scores$avg_zscore_k <- NA - interaction_scores$raw_shift_r <- NA - interaction_scores$z_shift_r <- NA - interaction_scores$lm_score_r <- NA - interaction_scores$z_lm_r <- NA - interaction_scores$r_squared_r <- NA - interaction_scores$sum_z_score_r <- NA - interaction_scores$avg_zscore_r <- NA - interaction_scores$raw_shift_auc <- NA - interaction_scores$z_shift_auc <- NA - interaction_scores$lm_score_auc <- NA - interaction_scores$z_lm_auc <- NA - interaction_scores$r_squared_auc <- NA - interaction_scores$sum_z_score_auc <- NA - interaction_scores$avg_zscore_auc <- NA - interaction_scores$NG <- NA - interaction_scores$DB <- NA - interaction_scores$SM <- NA - - for (i in 1:num_genes) { - # Get each deletion strain ORF - gene_sel <- unique(df2$OrfRep)[i] - # Extract only the current deletion strain and its data - x_gene_sel <- df2[df2$OrfRep == gene_sel, ] - - x_stats_interaction <- ddply( - x_gene_sel, - c("OrfRep", "Gene", "conc_num", "conc_num_factor"), - summarise, - N = (length(l)), - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - se_l = sd_l / sqrt(N - 1), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - se_k = sd_k / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - se_r = sd_r / sqrt(N - 1), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - se_auc = sd_auc / sqrt(N - 1), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) - - # Get shift vals - # if L is 0, that means the no growth on no drug - # if L is NA at 0, that means the spot was removed due to contamination - # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - # otherwise calculate shift at no drug conc - if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { - x_stats_interaction$raw_shift_l <- 0 - x_stats_interaction$raw_shift_k <- 0 - x_stats_interaction$raw_shift_r <- 0 - x_stats_interaction$raw_shift_auc <- 0 - x_stats_interaction$z_shift_l <- 0 - x_stats_interaction$z_shift_k <- 0 - x_stats_interaction$z_shift_r <- 0 - x_stats_interaction$z_shift_auc <- 0 - } else { - x_stats_interaction$raw_shift_l <- x_stats_interaction$mean_l[1] - background_l - x_stats_interaction$raw_shift_k <- x_stats_interaction$mean_k[1] - background_k - x_stats_interaction$raw_shift_r <- x_stats_interaction$mean_r[1] - background_r - x_stats_interaction$raw_shift_auc <- x_stats_interaction$mean_auc[1] - background_auc - x_stats_interaction$z_shift_l <- x_stats_interaction$raw_shift_l[1] / x_stats_by_l$sd[1] - x_stats_interaction$z_shift_k <- x_stats_interaction$raw_shift_k[1] / x_stats_by_k$sd[1] - x_stats_interaction$z_shift_r <- x_stats_interaction$raw_shift_r[1] / x_stats_by_r$sd[1] - x_stats_interaction$z_shift_auc <- x_stats_interaction$raw_shift_auc[1] / x_stats_by_auc$sd[1] - } - - # Get WT vals - x_stats_interaction$WT_l <- x_stats_by_l$mean - x_stats_interaction$WT_k <- x_stats_by_k$mean - x_stats_interaction$WT_r <- x_stats_by_r$mean - x_stats_interaction$WT_auc <- x_stats_by_auc$mean - - # Get WT SD - x_stats_interaction$WT_sd_l <- x_stats_by_l$sd - x_stats_interaction$WT_sd_k <- x_stats_by_k$sd - x_stats_interaction$WT_sd_r <- x_stats_by_r$sd - x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd - - # Only get scores if there's growth at no drug - if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { - - # Calculate expected values - x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$raw_shift_l - x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$raw_shift_k - x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$raw_shift_r - x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$raw_shift_auc - - # Calculate normalized delta values - x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l - x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k - x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r - x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc - - # Disregard shift for no growth values in Z score calculation - if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc - } - - # Disregard shift for set to max values in Z score calculation - if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l - # Only calculate the L value without shift since L is the only adjusted value - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc - } - - # Calculate Z score at each concentration - x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) - x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) - x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) - x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) - - # Get linear model - gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) - gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) - gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) - gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) - - # Get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] - r_squared_l <- summary(gene_lm_l)$r.squared - gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] - r_squared_k <- summary(gene_lm_k)$r.squared - gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] - r_squared_auc <- summary(gene_lm_auc)$r.squared - - # Get total of non removed values - num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 - - # Report the scores - interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- - as.character(x_gene_sel$OrfRep[1]) - interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- - as.character(x_gene_sel$Gene[1]) - interaction_scores$raw_shift_l[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_l[1] - interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_l[1] - interaction_scores$lm_score_l[interaction_scores$OrfRep == gene_sel] <- - gene_interaction_l - interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- - (gene_interaction_l - lm_mean_l) / lm_sd_l - interaction_scores$r_squared_l[interaction_scores$OrfRep == gene_sel] <- - r_squared_l - interaction_scores$sum_z_score_l[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_l, na.rm = TRUE) - interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) - interaction_scores$raw_shift_k[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_k[1] - interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_k[1] - interaction_scores$lm_score_k[interaction_scores$OrfRep == gene_sel] <- - gene_interaction_k - interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- - (gene_interaction_k - lm_mean_k) / lm_sd_k - interaction_scores$r_squared_k[interaction_scores$OrfRep == gene_sel] <- - r_squared_k - interaction_scores$sum_z_score_k[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_k, na.rm = TRUE) - interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) - interaction_scores$raw_shift_r[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_r[1] - interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_r[1] - interaction_scores$lm_score_r[interaction_scores$OrfRep == gene_sel] <- - gene_interaction_r - interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- - (gene_interaction_r - lm_mean_r) / lm_sd_r - interaction_scores$r_squared_r[interaction_scores$OrfRep == gene_sel] <- - r_squared_r - interaction_scores$sum_z_score_r[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_r, na.rm = TRUE) - interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) - interaction_scores$raw_shift_auc[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$raw_shift_auc[1] - interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- - x_stats_interaction$z_shift_auc[1] - interaction_scores$lm_score_auc[interaction_scores$OrfRep == gene_sel] <- - gene_interaction_auc - interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- - (gene_interaction_auc - lm_mean_auc) / lm_sd_auc - interaction_scores$r_squared_auc[interaction_scores$OrfRep == gene_sel] <- - r_squared_auc - interaction_scores$sum_z_score_auc[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_auc, na.rm = TRUE) - interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- - sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) - } - - if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { - - # Calculate expected values - x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$raw_shift_l - x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$raw_shift_k - x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$raw_shift_r - x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$raw_shift_auc - - # Calculate normalized delta values - x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l - x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k - x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r - x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc - - # Disregard shift for missing values in Z score calculatiom - if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- - x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc - } - - # Disregard shift for set to max values in Z score calculation - if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { - x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- - x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l - - # Only calculate the L value without shift since L is the only adjusted value - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r - # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- - # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc - } - - # Calculate Z score at each concentration - x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) - x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) - x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) - x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) - - # NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_l <- NA - gene_lm_k <- NA - gene_lm_r <- NA - gene_interaction_l <- NA - r_squared_l <- NA - gene_interaction_k <- NA - r_squared_k <- NA - gene_interaction_r <- NA - r_squared_r <- NA - x_stats_interaction$raw_shift_l <- NA - x_stats_interaction$raw_shift_k <- NA - x_stats_interaction$raw_shift_r <- NA - x_stats_interaction$raw_shift_auc <- NA - x_stats_interaction$z_shift_l <- NA - x_stats_interaction$z_shift_k <- NA - x_stats_interaction$z_shift_r <- NA - x_stats_interaction$z_shift_auc <- NA - interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$OrfRep[1]) - interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$Gene[1]) - interaction_scores$raw_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_l[1] - interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] - interaction_scores$lm_score_l[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$r_squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l - interaction_scores$sum_z_score_l[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$raw_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_k[1] - interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] - interaction_scores$lm_score_k[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$r_squared_k[interaction_scores$OrfRep == gene_sel] <- r_squared_k - interaction_scores$sum_z_score_k[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$raw_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_r[1] - interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] - interaction_scores$lm_score_r[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$r_squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r - interaction_scores$sum_z_score_r[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$raw_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$raw_shift_auc[1] - interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] - interaction_scores$lm_score_auc[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$r_squared_auc[interaction_scores$OrfRep == gene_sel] <- r_squared_auc - interaction_scores$sum_z_score_auc[interaction_scores$OrfRep == gene_sel] <- NA - interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- NA - } - - if (i == 1) { - x_stats_interaction_all <- x_stats_interaction - } - if (i > 1) { - x_stats_interaction_all <- rbind(x_stats_interaction_all, x_stats_interaction) - } - - interaction_scores$NG[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) - interaction_scores$DB[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) - interaction_scores$SM[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) - - # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) - } - - print("Pass Int Calculation loop") - interaction_scores <- interaction_scores[order(interaction_scores$z_lm_l, decreasing = TRUE), ] - interaction_scores <- interaction_scores[order(interaction_scores$NG, decreasing = TRUE), ] - df_order_by_OrfRep <- unique(interaction_scores$OrfRep) - # x_stats_interaction_all <- x_stats_interaction_all[order(x_stats_interaction_all$NG, decreasing = TRUE), ] - write.csv(interaction_scores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) - - interaction_scores_deletion_enhancers_l <- - interaction_scores[interaction_scores$avg_zscore_l >= 2, ] - interaction_scores_deletion_enhancers_k <- - interaction_scores[interaction_scores$avg_zscore_k <= -2, ] - interaction_scores_deletion_suppressors_l <- - interaction_scores[interaction_scores$avg_zscore_l <= -2, ] - interaction_scores_deletion_suppressors_k <- - interaction_scores[interaction_scores$avg_zscore_k >= 2, ] - interaction_scores_deletion_enhancers_and_suppressors_l <- - interaction_scores[interaction_scores$avg_zscore_l >= 2 | interaction_scores$avg_zscore_l <= -2, ] - interaction_scores_deletion_enhancers_and_suppressors_k <- - interaction_scores[interaction_scores$avg_zscore_k >= 2 | interaction_scores$avg_zscore_k <= -2, ] - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- - interaction_scores[interaction_scores$z_lm_l >= 2 & interaction_scores$avg_zscore_l <= -2, ] - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- - interaction_scores[interaction_scores$z_lm_l <= -2 & interaction_scores$avg_zscore_l >= 2, ] - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- - interaction_scores[interaction_scores$z_lm_k <= -2 & interaction_scores$avg_zscore_k >= 2, ] - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- - interaction_scores[interaction_scores$z_lm_k >= 2 & interaction_scores$avg_zscore_k <= -2, ] - interaction_scores_deletion_enhancers_l <- - interaction_scores_deletion_enhancers_l[ - !is.na(interaction_scores_deletion_enhancers_l$OrfRep), ] - interaction_scores_deletion_enhancers_k <- - interaction_scores_deletion_enhancers_k[ - !is.na(interaction_scores_deletion_enhancers_k$OrfRep), ] - interaction_scores_deletion_suppressors_l <- - interaction_scores_deletion_suppressors_l[ - !is.na(interaction_scores_deletion_suppressors_l$OrfRep), ] - interaction_scores_deletion_suppressors_k <- - interaction_scores_deletion_suppressors_k[ - !is.na(interaction_scores_deletion_suppressors_k$OrfRep), ] - interaction_scores_deletion_enhancers_and_suppressors_l <- - interaction_scores_deletion_enhancers_and_suppressors_l[ - !is.na(interaction_scores_deletion_enhancers_and_suppressors_l$OrfRep), ] - interaction_scores_deletion_enhancers_and_suppressors_k <- - interaction_scores_deletion_enhancers_and_suppressors_k[ - !is.na(interaction_scores_deletion_enhancers_and_suppressors_k$OrfRep), ] - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l[ - !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l$OrfRep), ] - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l[ - !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l$OrfRep), ] - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- - interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k[ - !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k$OrfRep), ] - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- - interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k[ - !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k$OrfRep), ] - write.csv(interaction_scores_deletion_enhancers_l, - file.path(out_dir, "zscores_interaction_deletion_enhancers_l.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_k, - file.path(out_dir, "zscores_interaction_deletion_enhancers_k.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_suppressors_l, - file.path(out_dir, "zscores_interaction_deletion_suppressors_l.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_suppressors_k, - file.path(out_dir, "zscores_interaction_deletion_suppressors_k.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_and_suppressors_l, - file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_and_suppressors_k, - file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l, - file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_l.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l, - file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_l.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k, - file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_k.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k, - file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_k.csv"), row.names = FALSE) - - # Get enhancers and suppressors for linear regression - interaction_scores_deletion_enhancers_l_lm <- - interaction_scores[interaction_scores$z_lm_l >= 2, ] - interaction_scores_deletion_enhancers_k_lm <- - interaction_scores[interaction_scores$z_lm_k <= -2, ] - interaction_scores_deletion_suppressors_l_lm <- - interaction_scores[interaction_scores$z_lm_l <= -2, ] - interaction_scores_deletion_suppressors_k_lm <- - interaction_scores[interaction_scores$z_lm_k >= 2, ] - interaction_scores_deletion_enhancers_and_suppressors_l_lm <- - interaction_scores[interaction_scores$z_lm_l >= 2 | interaction_scores$z_lm_l <= -2, ] - interaction_scores_deletion_enhancers_and_suppressors_k_lm <- - interaction_scores[interaction_scores$z_lm_k >= 2 | interaction_scores$z_lm_k <= -2, ] - interaction_scores_deletion_enhancers_l_lm <- - interaction_scores_deletion_enhancers_l_lm[ - !is.na(interaction_scores_deletion_enhancers_l_lm$OrfRep), ] - interaction_scores_deletion_enhancers_k_lm <- - interaction_scores_deletion_enhancers_k_lm[ - !is.na(interaction_scores_deletion_enhancers_k_lm$OrfRep), ] - interaction_scores_deletion_suppressors_l_lm <- - interaction_scores_deletion_suppressors_l_lm[ - !is.na(interaction_scores_deletion_suppressors_l_lm$OrfRep), ] - interaction_scores_deletion_suppressors_k_lm <- - interaction_scores_deletion_suppressors_k_lm[ - !is.na(interaction_scores_deletion_suppressors_k_lm$OrfRep), ] - interaction_scores_deletion_enhancers_and_suppressors_l_lm <- - interaction_scores_deletion_enhancers_and_suppressors_l_lm[ - !is.na(interaction_scores_deletion_enhancers_and_suppressors_l_lm$OrfRep), ] - interaction_scores_deletion_enhancers_and_suppressors_k_lm <- - interaction_scores_deletion_enhancers_and_suppressors_k_lm[ - !is.na(interaction_scores_deletion_enhancers_and_suppressors_k_lm$OrfRep), ] - - write.csv(interaction_scores_deletion_enhancers_l_lm, - file.path(out_dir, "zscores_interaction_deletion_enhancers_l_lm.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_k_lm, - file.path(out_dir, "zscores_interaction_deletion_enhancers_k_lm.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_suppressors_l_lm, - file.path(out_dir, "zscores_interaction_deletion_suppressors_l_lm.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_suppressors_k_lm, - file.path(out_dir, "zscores_interaction_deletion_suppressors_k_lm.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_and_suppressors_l_lm, - file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l_lm.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion_enhancers_and_suppressors_k_lm, - file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k_lm.csv"), row.names = FALSE) - # write.csv(Labels, study_info_file, row.names = FALSE) - # write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE) - - for (i in 1:num_genes) { - gene_sel <- unique(interaction_scores$OrfRep)[i] - x_z_calculations <- x_stats_interaction_all[x_stats_interaction_all$OrfRep == gene_sel, ] - df_int_scores <- interaction_scores[interaction_scores$OrfRep == gene_sel, ] - - p_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_l, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_l, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() - - p_k[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_k)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_k, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() - - p_r[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_r)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-0.65, 0.65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + - theme_publication() - - p_auc[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_auc)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + - ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + - scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + - annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$z_lm_auc, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(x_z_calculations$conc_num_factor), - labels = unique(as.character(x_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + - theme_publication() - - if (i == 1) { - x_stats_interaction_all_final <- x_z_calculations - } - if (i > 1) { - x_stats_interaction_all_final <- rbind(x_stats_interaction_all_final, x_z_calculations) - } - } - print("Pass Int ggplot loop") - write.csv(x_stats_interaction_all_final, file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - - blank <- ggplot(df2_rf) + geom_blank() - - pdf(file.path(out_dir, "interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) - - x_stats_df2_rf <- ddply( - df2_rf, - c("conc_num", "conc_num_factor"), - summarise, - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - max_l = max(l, na.rm = TRUE), - min_l = min(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - max_k = max(K, na.rm = TRUE), - min_k = min(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - max_auc = max(AUC, na.rm = TRUE), - min_auc = min(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) - - l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - annotate("text", x = -0.25, y = 10, label = "NG") + - annotate("text", x = -0.25, y = 5, label = "DB") + - annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + - theme_publication() - - k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(-20, 160)) + - annotate("text", x = -0.25, y = -5, label = "NG") + - annotate("text", x = -0.25, y = -12.5, label = "DB") + - annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + - theme_publication() - - r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - annotate("text", x = -0.25, y = .9, label = "NG") + - annotate("text", x = -0.25, y = .8, label = "DB") + - annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + - theme_publication() - - auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - annotate("text", x = -0.25, y = 11000, label = "NG") + - annotate("text", x = -0.25, y = 10000, label = "DB") + - annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + - theme_publication() - - l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - theme_publication() - - k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - theme_publication() - - r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - theme_publication() - - auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - theme_publication() - - grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) - grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) - - # Plot the references - # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) - # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) - - # Loop for grid arrange 4x3 - j <- rep(1, ((num_genes) / 3) - 1) - for (n in 1:length(j)) { - j[n + 1] <- n * 3 + 1 - } - - # Loop for printing each plot - num <- 0 - for (m in 1:(round((num_genes) / 3) - 1)) { - num <- j[m] - grid.arrange(p_l[[num]], p_k[[num]], p_r[[num]], p_auc[[num]], - p_l[[num + 1]], p_k[[num + 1]], p_r[[num + 1]], p_auc[[num + 1]], - p_l[[num + 2]], p_k[[num + 2]], p_r[[num + 2]], p_auc[[num + 2]], ncol = 4, nrow = 3) - # grid.arrange(p_l[[364]], p_k[[364]], p_r[[364]], - # p_l[[365]], p_k[[365]], p_r[[365]], p_l[[366]], p_k[[366]], p_r[[366]], ncol = 3, nrow = 3) - # p1[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_k[[num + 4]], p_r[[num + 4]] - } - - if (num_genes != (num + 2)) { - total_num <- num_genes - (num + 2) - if (total_num == 5) { - grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], - p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], - p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) - - grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], - p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], p_auc[[num + 7]], - blank, blank, blank, blank, ncol = 4, nrow = 3) - } - if (total_num == 4) { - grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], - p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], - p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) - - grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], - blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) - } - if (total_num == 3) { - grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], - p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], - p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - if (total_num == 2) { - grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], - p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], - blank, blank, blank, blank, ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - if (total_num == 1) { - grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], - blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - } - - invisible(dev.off()) - - pdf(file.path(out_dir, "rf_interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) - - x_stats_df2_rf <- ddply( - df2_rf, - c("conc_num", "conc_num_factor"), - summarise, - mean_l = mean(l, na.rm = TRUE), - median_l = median(l, na.rm = TRUE), - max_l = max(l, na.rm = TRUE), - min_l = min(l, na.rm = TRUE), - sd_l = sd(l, na.rm = TRUE), - mean_k = mean(K, na.rm = TRUE), - median_k = median(K, na.rm = TRUE), - max_k = max(K, na.rm = TRUE), - min_k = min(K, na.rm = TRUE), - sd_k = sd(K, na.rm = TRUE), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - mean_auc = mean(AUC, na.rm = TRUE), - median_auc = median(AUC, na.rm = TRUE), - max_auc = max(AUC, na.rm = TRUE), - min_auc = min(AUC, na.rm = TRUE), - sd_auc = sd(AUC, na.rm = TRUE), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) - - l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - annotate("text", x = -0.25, y = 10, label = "NG") + - annotate("text", x = -0.25, y = 5, label = "DB") + - annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + - theme_publication() - - k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(-20, 160)) + - annotate("text", x = -0.25, y = -5, label = "NG") + - annotate("text", x = -0.25, y = -12.5, label = "DB") + - annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + - theme_publication() - - r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - annotate("text", x = -0.25, y = .9, label = "NG") + - annotate("text", x = -0.25, y = .8, label = "DB") + - annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + - theme_publication() - - auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), - fun.max = function(x) mean(x) + sd(x), - geom = "errorbar", color = "red") + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - annotate("text", x = -0.25, y = 11000, label = "NG") + - annotate("text", x = -0.25, y = 10000, label = "DB") + - annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + - annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + - theme_publication() - - l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - theme_publication() - - k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - theme_publication() - - r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - theme_publication() - - auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + - geom_boxplot() + - scale_x_discrete( - name = unique(df$Drug[1]), - breaks = unique(df2_rf$conc_num_factor), - labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(12000, 0)) + - theme_publication() - - grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) - grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) - - # Plot the references - # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) - # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) - - # Loop for grid arrange 4x3 - j <- rep(1, ((num_genes_rf) / 3) - 1) - for (n in 1:length(j)) { - j[n + 1] <- n * 3 + 1 - } - - # Loop for printing each plot - num <- 0 - for (m in 1:(round((num_genes_rf) / 3) - 1)) { - num <- j[m] - grid.arrange(p_rf_l[[num]], p_rf_k[[num]], p_rf_r[[num]], p_rf_auc[[num]], - p_rf_l[[num + 1]], p_rf_k[[num + 1]], p_rf_r[[num + 1]], p_rf_auc[[num + 1]], - p_rf_l[[num + 2]], p_rf_k[[num + 2]], p_rf_r[[num + 2]], p_rf_auc[[num + 2]], ncol = 4, nrow = 3) - # grid.arrange(p_rf_l[[364]], p_rf_k[[364]], p_rf_r[[364]], - # p_rf_l[[365]], p_rf_k[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_k[[366]], p_rf_r[[366]], ncol = 3, nrow = 3) - # p1[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]] - } - if (num_genes_rf != (num + 2)) { - total_num <- num_genes_rf - (num + 2) - - if (total_num == 5) { - grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], - p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], - p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 - ) - grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], - p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], p_rf_auc[[num + 7]], - blank, blank, blank, blank, ncol = 4, nrow = 3 - ) - } - - if (total_num == 4) { - grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], - p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], - p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 - ) - grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], - blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 - ) - } - - if (total_num == 3) { - grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], - p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], - p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 - ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - - if (total_num == 2) { - grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], - p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], - blank, blank, blank, blank, ncol = 4, nrow = 3 - ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - - if (total_num == 1) { - grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], - blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 - ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) - } - } - - invisible(dev.off()) - - # Print rank plots for L and k gene interactions - interaction_scores_adjust_missing <- interaction_scores - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_l), ]$avg_zscore_l <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_k), ]$avg_zscore_k <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_r), ]$avg_zscore_r <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_auc), ]$avg_zscore_auc <- 0.001 - - interaction_scores_adjust_missing$l_rank <- NA - interaction_scores_adjust_missing$k_rank <- NA - interaction_scores_adjust_missing$r_rank <- NA - interaction_scores_adjust_missing$auc_rank <- NA - - interaction_scores_adjust_missing$l_rank <- rank(interaction_scores_adjust_missing$avg_zscore_l) - interaction_scores_adjust_missing$k_rank <- rank(interaction_scores_adjust_missing$avg_zscore_k) - interaction_scores_adjust_missing$r_rank <- rank(interaction_scores_adjust_missing$avg_zscore_r) - interaction_scores_adjust_missing$auc_rank <- rank(interaction_scores_adjust_missing$avg_zscore_auc) - - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_l), ]$z_lm_l <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_k), ]$z_lm_k <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_r), ]$z_lm_r <- 0.001 - interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_auc), ]$z_lm_auc <- 0.001 - - interaction_scores_adjust_missing$l_rank_lm <- NA - interaction_scores_adjust_missing$k_rank_lm <- NA - interaction_scores_adjust_missing$r_rank_lm <- NA - interaction_scores_adjust_missing$auc_rank_lm <- NA - - interaction_scores_adjust_missing$l_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_l) - interaction_scores_adjust_missing$k_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_k) - interaction_scores_adjust_missing$r_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_r) - interaction_scores_adjust_missing$auc_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_auc) - - # Rank plots - rank_l_1sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 1, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -1, ])[1])) + - theme_publication() - - rank_l_2sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 2, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -2, ])[1])) + - theme_publication() - - rank_l_3sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 3, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -3, ])[1])) + - theme_publication() - - rank_k_1sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -1, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 1, ])[1])) + - theme_publication() - - rank_k_2sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -2, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 2, ])[1])) + - theme_publication() - - rank_k_3sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -3, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 3, ])[1])) + - theme_publication() - - rank_l_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - pdf(file.path(out_dir, "rank_plots.pdf"), width = 18, height = 12, onefile = TRUE) - - grid.arrange( - rank_l_1sd, - rank_l_2sd, - rank_l_3sd, - rank_k_1sd, - rank_k_2sd, - rank_k_3sd, - ncol = 3, - nrow = 2 - ) - - grid.arrange( - rank_l_1sd_notext, - rank_l_2sd_notext, - rank_l_3sd_notext, - rank_k_1sd_notext, - rank_k_2sd_notext, - rank_k_3sd_notext, - ncol = 3, - nrow = 2 - ) - - invisible(dev.off()) - - rank_l_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 1, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -1, ])[1])) + - theme_publication() - - rank_l_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 2, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -2, ])[1])) + - theme_publication() - - rank_l_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 3, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -3, ])[1])) + - theme_publication() - - rank_k_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -1, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 1, ])[1])) + - theme_publication() - - rank_k_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -2, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 2, ])[1])) + - theme_publication() - - rank_k_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -3, ])[1])) + - annotate("text", - x = (dim(interaction_scores_adjust_missing)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 3, ])[1])) + - theme_publication() - - rank_l_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - pdf(file.path(out_dir, "rank_plots_lm.pdf"), width = 18, height = 12, onefile = TRUE) - - grid.arrange( - rank_l_1sd_lm, - rank_l_2sd_lm, - rank_l_3sd_lm, - rank_k_1sd_lm, - rank_k_2sd_lm, - rank_k_3sd_lm, - ncol = 3, nrow = 2 - ) - - grid.arrange( - rank_l_1sd_notext_lm, - rank_l_2sd_notext_lm, - rank_l_3sd_notext_lm, - rank_k_1sd_notext_lm, - rank_k_2sd_notext_lm, - rank_k_3sd_notext_lm, - ncol = 3, nrow = 2 - ) - - invisible(dev.off()) - - df_na_rm <- interaction_scores[!is.na(interaction_scores$z_lm_l) | !is.na(interaction_scores$avg_zscore_l), ] - - # Find overlaps - df_na_rm$Overlap <- "No Effect" - try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Both") - try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Both") - try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= 2, ]$Overlap <- "Deletion Enhancer lm only") - try(df_na_rm[df_na_rm$z_lm_l <= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= -2, ]$Overlap <- "Deletion Suppressor lm only") - try(df_na_rm[df_na_rm$z_lm_l >= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_l <- lm(df_na_rm$z_lm_l ~ df_na_rm$avg_zscore_l) - l_lm <- summary(get_lm_l) - - get_lm_k <- lm(df_na_rm$z_lm_k ~ df_na_rm$avg_zscore_k) - k_lm <- summary(get_lm_k) - - get_lm_r <- lm(df_na_rm$z_lm_r ~ df_na_rm$avg_zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_auc <- lm(df_na_rm$z_lm_auc ~ df_na_rm$avg_zscore_auc) - auc_lm <- summary(get_lm_auc) - - pdf(file.path(out_dir, "avg_zscore_vs_lm_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) - - print(ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect( - aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), - color = "grey20", - size = 0.25, - alpha = 0.1, - inherit.aes = FALSE, - fill = NA) + - annotate("text", - x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + - theme_publication_legend_right() - ) - - print(ggplot(df_na_rm, aes(avg_zscore_k, z_lm_k)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect( - aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), - color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", - x = 0, y = 0, label = paste("R-squared = ", round(k_lm$r.squared, 2))) + - theme_publication_legend_right() - ) - - print(ggplot(df_na_rm, aes(avg_zscore_r, z_lm_r)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect( - aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), - color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", - x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + - theme_publication_legend_right() - ) - - print(ggplot(df_na_rm, aes(avg_zscore_auc, z_lm_auc)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect( - aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), - color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", - x = 0, y = 0, label = paste("R-squared = ", round(auc_lm$r.squared, 2))) + - theme_publication_legend_right() - ) - - invisible(dev.off()) - - lm_v_zscore_l <- - ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + - geom_rect( - aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), - color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA - ) + - annotate("text", - x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + - theme_publication_legend_right() - - pgg <- ggplotly(lm_v_zscore_l) - plotly_path <- file.path(out_dir, "avg_zscore_vs_lm_na_rm.html") - saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - - df_na_rm$l_rank <- rank(df_na_rm$avg_zscore_l) - df_na_rm$k_rank <- rank(df_na_rm$avg_zscore_k) - df_na_rm$r_rank <- rank(df_na_rm$avg_zscore_r) - df_na_rm$auc_rank <- rank(df_na_rm$avg_zscore_auc) - df_na_rm$l_rank_lm <- rank(df_na_rm$z_lm_l) - df_na_rm$k_rank_lm <- rank(df_na_rm$z_lm_k) - df_na_rm$r_rank_lm <- rank(df_na_rm$z_lm_r) - df_na_rm$auc_rank_lm <- rank(df_na_rm$z_lm_auc) - - # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_l2 <- lm(df_na_rm$l_rank_lm ~ df_na_rm$l_rank) - l_lm2 <- summary(get_lm_l2) - get_lm_k2 <- lm(df_na_rm$k_rank_lm ~ df_na_rm$k_rank) - k_lm2 <- summary(get_lm_k2) - get_lm_r2 <- lm(df_na_rm$r_rank_lm ~ df_na_rm$r_rank) - r_lm2 <- summary(get_lm_r2) - get_lm_auc2 <- lm(df_na_rm$auc_rank_lm ~ df_na_rm$auc_rank) - auc_lm2 <- summary(get_lm_auc2) - num_genes_na_rm2 <- (dim(df_na_rm)[1]) / 2 - - pdf(file.path(out_dir, "avg_zscore_vs_lm_ranked_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) - - print( - ggplot(df_na_rm, aes(l_rank, l_rank_lm)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text", - x = num_genes_na_rm2, - y = num_genes_na_rm2, - label = paste("R-squared = ", round(l_lm2$r.squared, 2))) + - theme_publication_legend_right() - ) - - print( - ggplot(df_na_rm, aes(k_rank, k_rank_lm)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text", - x = num_genes_na_rm2, - y = num_genes_na_rm2, - label = paste("R-squared = ", round(k_lm2$r.squared, 2))) + - theme_publication_legend_right() - ) - - print( - ggplot(df_na_rm, aes(r_rank, r_rank_lm)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text", - x = num_genes_na_rm2, - y = num_genes_na_rm2, - label = paste("R-squared = ", round(r_lm2$r.squared, 2))) + - theme_publication_legend_right() - ) - - print( - ggplot(df_na_rm, aes(auc_rank, auc_rank_lm)) + - geom_point(aes(color = Overlap), shape = 3) + - geom_smooth(method = "lm", color = 1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text", - x = num_genes_na_rm2, - y = num_genes_na_rm2, - label = paste("R-squared = ", round(auc_lm2$r.squared, 2))) + - theme_publication_legend_right() - ) - - invisible(dev.off()) - - rank_l_1sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_l >= 1, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_l <= -1, ])[1])) + - theme_publication() - - rank_l_2sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_l >= 2, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_l <= -2, ])[1])) + - theme_publication() - - rank_l_3sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_l >= 3, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_l <= -3, ])[1])) + - theme_publication() - - rank_k_1sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_k <= -1, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_k >= 1, ])[1])) + - theme_publication() - - rank_k_2sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_k <= -2, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_k >= 2, ])[1])) + - theme_publication() - - rank_k_3sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$avg_zscore_k <= -3, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$avg_zscore_k >= 3, ])[1])) + - theme_publication() - - rank_l_1sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_2sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_3sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + - ggtitle("Average Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Avg Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_1sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_2sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_3sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + - ggtitle("Average Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Avg Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - pdf(file.path(out_dir, "rank_plots_narm.pdf"), width = 18, height = 12, onefile = TRUE) - - grid.arrange( - rank_l_1sd, - rank_l_2sd, - rank_l_3sd, - rank_k_1sd, - rank_k_2sd, - rank_k_3sd, - ncol = 3, nrow = 2 - ) - - grid.arrange( - rank_l_1sd_notext, - rank_l_2sd_notext, - rank_l_3sd_notext, - rank_k_1sd_notext, - rank_k_2sd_notext, - rank_k_3sd_notext, - ncol = 3, nrow = 2 - ) - - invisible(dev.off()) - - rank_l_1sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_l >= 1, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_l <= -1, ])[1])) + - theme_publication() - - rank_l_2sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_l >= 2, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_l <= -2, ])[1])) + - theme_publication() - - rank_l_3sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_l >= 3, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_l <= -3, ])[1])) + - theme_publication() - - rank_k_1sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_k <= -1, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_k >= 1, ])[1])) + - theme_publication() - - rank_k_2sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, - label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_k <= -2, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_k >= 2, ])[1])) + - theme_publication() - - rank_k_3sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = -10, label = paste("Deletion Enhancers =", - dim(df_na_rm[df_na_rm$z_lm_k <= -3, ])[1])) + - annotate("text", - x = (dim(df_na_rm)[1] / 2), - y = 10, - label = paste("Deletion Suppressors =", - dim(df_na_rm[df_na_rm$z_lm_k >= 3, ])[1])) + - theme_publication() - - rank_l_1sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_2sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 2sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_l_3sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + - ggtitle("Interaction Z score vs. Rank for L above 3sd") + - xlab("Rank") + - ylab("Int Z score L") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_1sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_2sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 2sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - rank_k_3sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + - ggtitle("Interaction Z score vs. Rank for K above 3sd") + - xlab("Rank") + - ylab("Int Z score K") + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", - xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - theme_publication() - - pdf(file.path(out_dir, "rank_plots_lm_narm.pdf"), width = 18, height = 12, onefile = TRUE) - - grid.arrange( - rank_l_1sd_lm, - rank_l_2sd_lm, - rank_l_3sd_lm, - rank_k_1sd_lm, - rank_k_2sd_lm, - rank_k_3sd_lm, - ncol = 3, nrow = 2 - ) - - grid.arrange( - rank_l_1sd_notext_lm, - rank_l_2sd_notext_lm, - rank_l_3sd_notext_lm, - rank_k_1sd_notext_lm, - rank_k_2sd_notext_lm, - rank_k_3sd_notext_lm, - ncol = 3, nrow = 2 - ) - - invisible(dev.off()) +# Function to load and preprocess data +load_and_preprocess_data <- function(input_file) { + df <- tryCatch({ + read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) + }, error = function(e) { + stop("Error reading file: ", input_file, "\n", e$message) + }) + + df <- df[!df[[1]] %in% c("", "Scan"), ] + + numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") + df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) + + df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA} + df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA} + df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA + df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA} + + if (nrow(df) == 0) warning("Dataframe is empty after filtering") + df } -# Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(df_na_rm$z_lm_k ~ df_na_rm$z_lm_l) -l_lm_1 <- summary(get_lm_1) -get_lm_2 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_l) -l_lm_2 <- summary(get_lm_2) -get_lm_3 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_l) -l_lm_3 <- summary(get_lm_3) -get_lm_4 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_k) -l_lm_4 <- summary(get_lm_4) -get_lm_5 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_k) -l_lm_5 <- summary(get_lm_5) -get_lm_6 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_r) -l_lm_6 <- summary(get_lm_6) +df <- load_and_preprocess_data(args$input_file) -pdf(file.path(out_dir, "correlation_cpps.pdf"), width = 10, height = 7, onefile = TRUE) +delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE) +message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor) +message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance) -ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + - ylab("z-score K") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +df <- df %>% + mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>% + filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21") -ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + - ylab("z-score r") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +# Function to create plot +create_plot <- function(df, var, plot_type) { + filtered_df <- df %>% filter(is.finite(.data[[var]])) + p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + + ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) + + theme_publication() + + if (plot_type == "scatter") { + p <- p + + geom_point(shape = 3, size = 0.2, position = "jitter") + + stat_summary(fun = mean, geom = "point", size = 0.6) + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") + } else { + p <- p + geom_boxplot() + } + + return(p) +} -ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + -geom_point(shape = 3, color = "gray70") + -geom_smooth(method = "lm", color = "tomato3") + -ggtitle("Interaction L vs. Interaction AUC") + -xlab("z-score L") + -ylab("z-score AUC") + -annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + -theme_publication_legend_right() + -theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) -) +# Function to publish plot to PDF and HTML (Plotly) +publish_plot <- function(plot, plot_path) { + # if (file.exists(plot_path)) { + # file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX)) + # } + + pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT) + print(plot) + dev.off() + + pgg <- suppressWarnings(ggplotly(plot, + tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>% + layout(legend = list(orientation = "h"))) + saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE) +} -ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + - ylab("z-score r") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +# Function to publish multiple plots +publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") { + lapply(variables, function(var) { + plot <- create_plot(df, var, plot_type) + publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))) + }) +} -ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + - ylab("z-score AUC") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +# Function to calculate and publish summary statistics +publish_summary_stats <- function(df, variables, out_dir) { + stats_list <- lapply(variables, function(var) { + df %>% + group_by(OrfRep, conc_num) %>% + summarize( + N = n(), + mean_val = mean(.data[[var]], na.rm = TRUE), + sd_val = sd(.data[[var]], na.rm = TRUE), + se_val = sd_val / sqrt(N) + ) + }) + summary_stats_df <- bind_rows(stats_list, .id = "variable") + write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) +} -ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + - ylab("z-score AUC") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18)) +# Function to calculate and publish interaction scores +publish_interaction_scores <- function(df, out_dir) { + interaction_scores <- df %>% + group_by(OrfRep) %>% + summarize( + mean_L = mean(L, na.rm = TRUE), + mean_K = mean(K, na.rm = TRUE), + mean_r = mean(r, na.rm = TRUE), + mean_AUC = mean(AUC, na.rm = TRUE), + delta_bg_mean = mean(delta_bg, na.rm = TRUE), + delta_bg_sd = sd(delta_bg, na.rm = TRUE) + ) %>% + mutate( + l_rank = rank(mean_L), + k_rank = rank(mean_K), + r_rank = rank(mean_r), + auc_rank = rank(mean_AUC) + ) + + write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) + write.csv(interaction_scores %>% + arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) +} -interaction_scores_rf2 <- interaction_scores_rf[!is.na(interaction_scores_rf$z_lm_l), ] +# Function to publish z-scores +publish_zscores <- function(df, out_dir) { + zscores <- df %>% + mutate( + zscore_L = scale(L, center = TRUE, scale = TRUE), + zscore_K = scale(K, center = TRUE, scale = TRUE), + zscore_r = scale(r, center = TRUE, scale = TRUE), + zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) + ) + + write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) +} -ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_k), color = "cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + - ylab("z-score K") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +# QC plot generation and publication +generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { + publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc) + + delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ] + publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance") +} -ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_r), color = "cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + - ylab("z-score r") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) +# Run QC plot generation and publication +generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc) -ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_auc), color = "cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + - ylab("z-score AUC") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) - -ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_r), color = "cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + - ylab("z-score r") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) - -ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_auc), color = "cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + - ylab("z-score AUC") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) - -ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf2, aes(z_lm_r, z_lm_auc), color = "cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + - ylab("z-score AUC") + - annotate("text", - x = 0, - y = 0, - label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + - theme_publication_legend_right() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - axis.text.x = element_text(size = 16), - axis.title.x = element_text(size = 18), - axis.text.y = element_text(size = 16), - axis.title.y = element_text(size = 18) - ) - -invisible(dev.off()) - -# write.csv(Labels, file.path("../Code/Parameters.csv"), row.names = FALSE) -# timestamp() +# Run summary statistics, interaction score, and z-score calculations and publications +variables <- c("L", "K", "r", "AUC", "delta_bg") +publish_summary_stats(df, variables, args$out_dir) +publish_interaction_scores(df, args$out_dir) +publish_zscores(df, args$out_dir) diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index ae579e7b..c20b3353 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -209,7 +209,12 @@ parse_input() { # @section Modules # @description # -# A module contains a cohesive set of actions/experiments to run on a project +# A module contains a cohesive set of tasks, including: +# +# * Filesystem operations +# * Variable setting +# * Executing other modules +# * Executing wrappers # # Use a module to: # @@ -217,8 +222,7 @@ parse_input() { # * Generate project directories # * Group multiple wrappers (and modules) into a larger task # * Dictate the ordering of multiple wrappers -# * Competently handle pushd and popd for their wrappers if they do not reside in the SCANS/PROJECT_DIR -# * Call their wrappers with the appropriate arguments +# * Call wrappers with the appropriate arguments # # @description module() { @@ -580,7 +584,6 @@ interactive_header() { unset response arr i done fi - echo "" # Project selection if [[ ${#PROJECTS[@]} -eq 0 ]]; then @@ -685,7 +688,7 @@ install_dependencies() { depends_r=( BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc - rmarkdown plotly htmlwidgets gplots gdata) + rmarkdown plotly htmlwidgets gplots gdata Hmisc) depends_bioc=(UCSC.utils org.Sc.sgd.db) [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars @@ -693,6 +696,7 @@ install_dependencies() { # Install system-wide dependencies echo "Installing system dependencies" echo "You may be prompted for your sudo password to install packages using your system package manager" + echo "If you do not have sudo access, you may want to use toolbox" case "$(uname -s)" in Linux*|CYGWIN*|MINGW*) if hash dnf &>/dev/null; then @@ -1360,9 +1364,9 @@ qhtcp() { # Run R interactions script on all studies for study in "${STUDIES[@]}"; do read -r num sd _ <<< "$study" - r_interactions "$num" "$sd" & # run in parallel, catch with wait below - done \ - && wait -n \ + r_interactions "$num" "$sd" & + done + wait -n \ && remc \ && gtf \ && gta @@ -1790,7 +1794,7 @@ r_join_interactions() { "${1:-$QHTCP_RESULTS_DIR}/parameters.csv" ) - ((DEBUG)) && declare -p + # ((DEBUG)) && declare -p # for when the going gets tough backup "${out_files[@]}" @@ -2368,6 +2372,10 @@ main() { fi done + # If module equals install_dependencies run install_dependencies + declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"} + [[ ${MODULES[*]} == "install_dependencies" ]] && install_dependencies + # Loop over projects for PROJECT_NAME in "${PROJECTS[@]}"; do declare -gx PROJECT_NAME @@ -2383,7 +2391,6 @@ main() { declare -gx EASY_RESULTS_DIR="$EASY_OUT_DIR/$PROJECT_PREFIX" declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta" declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf" - declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"} # ((DEBUG)) && declare -p # for when the going gets rough debug "Project: $PROJECT_NAME"