#!/usr/bin/env Rscript # NOTE: The script now has 6 optional arguments: # 1. Path to input easy results file # 2. /output/ directory # 3. Path to StudyInfo.csv # 4. Path to SGDgeneList # 5. Standard deviation value # 6. The experiment number (Exp# directory) library(ggplot2) library(plyr) library(extrafont) library(gridExtra) library(gplots) library(RColorBrewer) library(stringr) library(gdata) library(plotly) library(htmlwidgets) # Parse arguments args <- commandArgs(TRUE) inputFile <- file.path(args[1]) # Set output dir if (length(args) >= 2) { outDir <- file.path(args[2]) } else { outDir <- "/ZScores/" # for legacy workflow } # Set StudyInfo file path if (length(args) >= 3) { studyInfo <- file.path(args[3]) } else { studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow } # Set SGDgeneList file path if (length(args) >= 4) { SGDgeneList <- file.path(args[4]) } else { SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow } # Set standard deviation if (length(args) >= 5) { delBGFactor <- args[5] } else { # User prompt for std multiplier Value print("Enter a Standard Deviation value for noise filter") print("Sean Santos recommends 3 or 5") delBGFactor <- readLines(file("stdin"), n = 1L) } delBGFactor <- as.numeric(delBGFactor) if (is.na(delBGFactor)) { delBGFactor <- 3 # recommended by Sean } print(paste("The Standard Deviation Value is:", delBGFactor)) # Set experiment # if (length(args) >= 6) { expNumber <- args[6] } else { # User prompt for std multiplier Value print("Enter the experiment number (Exp# directory)") expNumber <- readLines(file("stdin"), n = 1L) } expNumber <- as.numeric(expNumber) outDir_QC <- file.path(outDir, "QC") if (!file.exists(outDir)) { dir.create(outDir) } if (!file.exists(outDir_QC)) { dir.create(file.path(outDir_QC)) } options(width = 1000) ls.str() # Write delBJFactor to the StudyInfo file # TODO we probably shouldn't be doing this, need one source of truth # TODO disabling this for now # Labels <- read.csv(file = studyInfo, stringsAsFactors = FALSE) # sep = "," # Labels[expNumber, 3] <- delBGFactor # write.csv(Labels, file = studyInfo, row.names = FALSE) # Begin User Data Selection Section # Read in the data X <- read.delim(inputFile, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) X <- X[!(X[[1]] %in% c("", "Scan")), ] # X <- X[!(X[[1]]%in%c(61:76)), ] #Remove dAmp plates which are Scans 61 thru 76 # X <- X[which(X$Specifics == "WT"), ] # X_length <- length(X[1, ]) # X_end <- length(X[1, ]) - 2 # X <- X[, c(1:42, X_end:X_length)] # Use numeric data to perform operations X$Col <- as.numeric(X$Col) X$Row <- as.numeric(X$Row) X$l <- as.numeric(X$l) X$K <- as.numeric(X$K) X$r <- as.numeric(X$r) X$Scan <- as.numeric(X$Scan) X$AUC <- as.numeric(X$AUC) X$LstBackgrd <- as.numeric(X$LstBackgrd) X$X1stBackgrd <- as.numeric(X$X1stBackgrd) # Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col # as was the case in Gemcitabin and Cytarabin experiments. # The following allows user to rename columns so as to get the appropriate # data where it needs to be for the script to run properly. # colnames(X)[7] <- "Modifier1" # colnames(X)[8] <- "Conc1" # colnames(X)[10] <- "Drug" # colnames(X)[11] <- "Conc" # Set the OrfRep to YDL227C for the ref data X[X$ORF == "YDL227C", ]$OrfRep <- "YDL227C" # Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. # That is the first DM plates are removed from the data set with the following. # This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin # X <- X[X$Conc1 != "0ug/ml", ] X <- X[X$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment # Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. # This code removes those spots from the data set used for the interaction analysis. # Dr.Hartman feels that these donot effect Zscores significantly and so "non-curated" files were used. # try(X <- X[X$ORF != "bad_spot", ]) # Get total number of drug concentrations total_conc_nums <- length(unique(X$Conc)) # Function to ID numbers in string with characters+numbers (ie to get numeric drug conc) numextract <- function(string) { str_extract(string, "\\-*\\d+\\.*\\d*") } # Generate a new column with the numeric drug concs X$Conc_Num <- as.numeric(numextract(X$Conc)) # Generate new column with the numeric drug concs as factors starting at 0 for the graphing later X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 # Get the max factor for concentration MAX_CONC <- max(X$Conc_Num_Factor) # If treating numbers not as factors uncomment next line and comment out previous line # MAX_CONC <- max(X$Conc_Num) # Remove wells with problems for making graphs and to not include in summary statistics X <- X[X$Gene != "BLANK", ] X <- X[X$Gene != "Blank", ] X <- X[X$ORF != "Blank", ] X <- X[X$Gene != "blank", ] # X <- X[X$Gene != "HO", ] Xbu <- X # Use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). # This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later # Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to overwrite the RF1&RF2 geneNames # in the Z_lm_L, K, r&AUC output values. Values correlated well but were off by a multiplier factor. genes <- data.frame(read.delim( file = SGDgeneList, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))) for (i in 1:length(X[, 14])) { ii <- as.numeric(i) line_num <- match(X[ii, 14], genes[, 1], nomatch = 1) OrfRepColNum <- as.numeric(match("OrfRep", names(X))) if (X[ii, OrfRepColNum] != "YDL227C") { X[ii, 15] <- genes[line_num, 2] } if ((X[ii, 15] == "") || (X[ii, 15] == "OCT1")) { X[ii, 15] <- X[ii, OrfRepColNum] } } Xblankreplace <- X # X = Xbu # for restore testing restore X if geneName 'Match' routine needs changing # Remove dAmPs # jlh confirmed to leave dAmps in so comment out this section # DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" # Damps <- read.delim(DAmPs_List, header = F) # X <- X[!(X$ORF %in% Damps$V1), ] # fix this to Damps[, 1] # XafterDampsRM = X # backup for debugging # Begin Graphics Boiler Plate Section # theme elements for plots theme_publication <- function(base_size = 14, base_family = "sans") { library(grid) library(ggthemes) (theme_foundation(base_size = base_size, base_family = base_family) + theme(plot.title = element_text( face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size = unit(0.2, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_publication <- function(...) { library(scales) discrete_scale("fill", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } scale_colour_Publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } theme_publication_legend_right <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme(plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_publication <- function(...) { discrete_scale("fill", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } scale_colour_Publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } # Print timestamp for initial time the code starts timestamp() # Begin QC Section # Part 2 - Quality control # Print quality control graphs for each dataset before removing contaminated data # and before adjusting missing data to max theoretical values # Plate analysis plot # Plate analysis is a quality check to identify plate effects containing anomalies Plate_Analysis_L <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() Plate_Analysis_K <- ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() Plate_Analysis_r <- ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() Plate_Analysis_AUC <- ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_publication() Plate_Analysis_L_Box <- ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() Plate_Analysis_K_Box <- ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() Plate_Analysis_r_Box <- ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() Plate_Analysis_AUC_Box <- ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_publication() # Quality control - values with a high delta background likely have heavy contamination # Check the frequency of these values # Report the L and K values of these spots # Report the number to be removed based on the Delta_Background_Tolerance X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd # Raw l vs K before QC Raw_l_vs_K_beforeQC <- ggplot(X, aes(l, K, color = as.factor(Conc_Num))) + geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + ggtitle("Raw L vs K before QC") + theme_publication_legend_right() pdf(file.path(outDir_QC, "Raw_L_vs_K_beforeQC.pdf"), width = 12, height = 8) Raw_l_vs_K_beforeQC dev.off() pgg <- ggplotly(Raw_l_vs_K_beforeQC) plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_beforeQC.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Set delta background tolerance based on 3 sds from the mean delta background Delta_Background_Tolerance <- mean(X$Delta_Backgrd) + (delBGFactor * sd(X$Delta_Backgrd)) # Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) sprintf("Delta_Background_Tolerance is %f", Delta_Background_Tolerance) Plate_Analysis_Delta_Backgrd <- ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2, position = "jitter") + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_publication() Plate_Analysis_Delta_Backgrd_Box <- ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_publication() X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance, ] X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K, na.rm = TRUE)) / 2 X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l, na.rm = TRUE)) / 2 X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance, aes(l, K, color = as.factor(Conc_Num))) + geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + ggtitle(paste("Raw L vs K for strains above delta background threshold of", Delta_Background_Tolerance, "or above")) + annotate("text", x = X_Delta_Backgrd_above_Tolerance_L_halfmedian, y = X_Delta_Backgrd_above_Tolerance_K_halfmedian, label = paste("Strains above delta background tolerance = ", X_Delta_Backgrd_above_Tolerance_toRemove) ) + theme_publication_legend_right() pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf"), width = 12, height = 8) X_Delta_Backgrd_above_Tolerance_L_vs_K dev.off() pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Frequency plot for all data vs. the delta_background DeltaBackground_Frequency_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_density() + ggtitle("Density plot for Delta Background by Conc All Data") + theme_publication_legend_right() # Bar plot for all data vs. the delta_background DeltaBackground_Bar_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_bar() + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right() pdf(file.path(outDir_QC, "Frequency_Delta_Background.pdf"), width = 12, height = 8) print(DeltaBackground_Frequency_Plot) print(DeltaBackground_Bar_Plot) dev.off() # Need to identify missing data, and differentiate between this data and removed data # so the removed data can get set to NA and the missing data can get set to max theoretical values # 1 for missing data, 0 for non missing data # Use "NG" for NoGrowth rather than "missing" X$NG <- 0 try(X[X$l == 0 & !is.na(X$l), ]$NG <- 1) # 1 for removed data, 0 non removed data # Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" X$DB <- 0 try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$DB <- 1) # Replace the CPPs for l, r, AUC and K (must be last!) for removed data try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$l <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$r <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$AUC <- NA) try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$K <- NA) # QC Plots Plate_Analysis_L_afterQC <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() Plate_Analysis_K_afterQC <- ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() Plate_Analysis_r_afterQC <- ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() Plate_Analysis_AUC_afterQC <- ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication() Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_publication() Plate_Analysis_L_Box_afterQC <- ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() Plate_Analysis_K_Box_afterQC <- ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() Plate_Analysis_r_Box_afterQC <- ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() Plate_Analysis_AUC_Box_afterQC <- ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication() Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_publication() # Print the plate analysis data before and after QC pdf(file.path(outDir_QC, "Plate_Analysis.pdf"), width = 14, height = 9) Plate_Analysis_L Plate_Analysis_L_afterQC Plate_Analysis_K Plate_Analysis_K_afterQC Plate_Analysis_r Plate_Analysis_r_afterQC Plate_Analysis_AUC Plate_Analysis_AUC_afterQC Plate_Analysis_Delta_Backgrd Plate_Analysis_Delta_Backgrd_afterQC dev.off() # Print the plate analysis data before and after QC pdf(file.path(outDir_QC, "Plate_Analysis_Boxplots.pdf"), width = 18, height = 9) Plate_Analysis_L_Box Plate_Analysis_L_Box_afterQC Plate_Analysis_K_Box Plate_Analysis_K_Box_afterQC Plate_Analysis_r_Box Plate_Analysis_r_Box_afterQC Plate_Analysis_AUC_Box Plate_Analysis_AUC_Box_afterQC Plate_Analysis_Delta_Backgrd_Box Plate_Analysis_Delta_Backgrd_Box_afterQC dev.off() # Remove the zero values and print plate analysis X_noZero <- X[which(X$l > 0), ] Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero, aes(Scan, K, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero, aes(Scan, r, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero, aes(Scan, AUC, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication() Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" ) + stat_summary( fun = mean, geom = "point", size = 0.6 ) + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_publication() Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication() Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_publication() # Print the plate analysis data before and after QC pdf(file.path(outDir_QC, "Plate_Analysis_noZeros.pdf"), width = 14, height = 9) Plate_Analysis_L_afterQC_Z Plate_Analysis_K_afterQC_Z Plate_Analysis_r_afterQC_Z Plate_Analysis_AUC_afterQC_Z Plate_Analysis_Delta_Backgrd_afterQC_Z dev.off() # Print the plate analysis data before and after QC pdf(file.path(outDir_QC, "Plate_Analysis_noZeros_Boxplots.pdf"), width = 18, height = 9) Plate_Analysis_L_Box_afterQC_Z Plate_Analysis_K_Box_afterQC_Z Plate_Analysis_r_Box_afterQC_Z Plate_Analysis_AUC_Box_afterQC_Z Plate_Analysis_Delta_Backgrd_Box_afterQC_Z dev.off() # Remove dataset with zeros removed rm(X_noZero) # X_test_missing_and_removed <- X[X$Removed == 1, ] # Calculate summary statistics for all strains, including both background and the deletions X_stats_ALL <- ddply( X, c("Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), max_L = max(l, na.rm = TRUE), min_L = min(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), max_K = max(K, na.rm = TRUE), min_K = min(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), max_AUC = max(AUC, na.rm = TRUE), min_AUC = min(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1) ) # print(X_stats_ALL_L) write.csv(X_stats_ALL, file.path(outDir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) # Part 3 - Generate summary statistics and calculate the max theoretical L value # Calculate the Z score at each drug conc for each deletion strain # Get the background strains - can be modified to take another argument but for most screens will just be YDL227C Background_Strains <- c("YDL227C") # First part of loop will go through for each background strain # In most cases there will only be one YDL227C for (s in Background_Strains) { X_Background <- X[X$OrfRep == s, ] # If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics # we may want to consider in some cases giving the max high value to L depending on the data type if (table(X_Background$l)[1] == 0) { X_Background[X_Background$l == 0, ]$l <- NA X_Background[X_Background$K == 0, ]$K <- NA X_Background[X_Background$r == 0, ]$r <- NA X_Background[X_Background$AUC == 0, ]$AUC <- NA } X_Background <- X_Background[!is.na(X_Background$l), ] # Get summary stats for L, K, R, AUC X_stats_BY_L <- ddply( X_Background, c("OrfRep", "Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l, na.rm = TRUE), median = median(l, na.rm = TRUE), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l, na.rm = TRUE), se = sd / sqrt(N - 1) ) print(X_stats_BY_L) X1_SD <- max(X_stats_BY_L$sd) X_stats_BY_K <- ddply( X_Background, c("OrfRep", "Conc_Num", "Conc_Num_Factor"), summarise, N = (length(K)), mean = mean(K, na.rm = TRUE), median = median(K, na.rm = TRUE), max = max(K, na.rm = TRUE), min = min(K, na.rm = TRUE), sd = sd(K, na.rm = TRUE), se = sd / sqrt(N - 1) ) X1_SD_K <- max(X_stats_BY_K$sd) X_stats_BY_r <- ddply( X_Background, c("OrfRep", "Conc_Num", "Conc_Num_Factor"), summarise, N = length(r), mean = mean(r, na.rm = TRUE), median = median(r, na.rm = TRUE), max = max(r, na.rm = TRUE), min = min(r, na.rm = TRUE), sd = sd(r, na.rm = TRUE), se = sd / sqrt(N - 1) ) X1_SD_r <- max(X_stats_BY_r$sd) X_stats_BY_AUC <- ddply( X_Background, c("OrfRep", "Conc_Num", "Conc_Num_Factor"), summarise, N = length(AUC), mean = mean(AUC, na.rm = TRUE), median = median(AUC, na.rm = TRUE), max = max(AUC, na.rm = TRUE), min = min(AUC, na.rm = TRUE), sd = sd(AUC, na.rm = TRUE), se = sd / sqrt(N - 1) ) X1_SD_AUC <- max(X_stats_BY_AUC$sd) X_stats_BY <- ddply( X_Background, c("OrfRep", "Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), max_L = max(l, na.rm = TRUE), min_L = min(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), max_K = max(K, na.rm = TRUE), min_K = min(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), max_AUC = max(AUC, na.rm = TRUE), min_L = min(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1) ) write.csv(X_stats_BY, file.path(outDir, "SummaryStats_BackgroundStrains.csv"), row.names = FALSE) # Calculate the max theoretical L values # Only look for max values when K is within 2SD of the ref strain for (q in unique(X$Conc_Num_Factor)) { if (q == 0) { X_within_2SD_K <- X[X$Conc_Num_Factor == q, ] X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l), ] X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ] X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] X_outside_2SD_K <- X[X$Conc_Num_Factor == q, ] X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l), ] # X_outside_2SD_K_Temp <- # X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] X_outside_2SD_K <- X_outside_2SD_K[ X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] # X_outside_2SD_K <- # X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])), ] } if (q > 0) { X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q, ] X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l), ] X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ] X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] X_within_2SD_K <- rbind(X_within_2SD_K, X_within_2SD_K_temp) X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q, ] X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l), ] # X_outside_2SD_K_Temp <- # X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] X_outside_2SD_K_temp <- X_outside_2SD_K_temp[ X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] # X_outside_2SD_K_temp <- # X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) , ] X_outside_2SD_K <- rbind(X_outside_2SD_K, X_outside_2SD_K_temp) } } X_stats_BY_L_within_2SD_K <- ddply( X_within_2SD_K, c("Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l), se = sd / sqrt(N - 1), z_max = (max - mean) / sd ) print(X_stats_BY_L_within_2SD_K) X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) write.csv( X_stats_BY_L_within_2SD_K, file.path(outDir_QC, "Max_Observed_L_Vals_for_spots_within_2SD_K.csv"), row.names = FALSE ) X_stats_BY_L_outside_2SD_K <- ddply( X_outside_2SD_K, c("Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean = mean(l), median = median(l), max = max(l, na.rm = TRUE), min = min(l, na.rm = TRUE), sd = sd(l), se = sd / sqrt(N - 1) ) print(X_stats_BY_L_outside_2SD_K) X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) # X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l, ] Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K, aes(l, K, color = as.factor(Conc_Num))) + geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_publication_legend_right() pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8) print(Outside_2SD_K_L_vs_K) dev.off() pgg <- ggplotly(Outside_2SD_K_L_vs_K) plotly_path <- file.path(outDir_QC, "RawL_vs_K_for_strains_outside_2SD_K.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K, aes(Delta_Backgrd, K, color = as.factor(Conc_Num))) + geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_publication_legend_right() pdf(file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8) Outside_2SD_K_delta_background_vs_K dev.off() pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) # pgg plotly_path <- file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_outside_2SD_K.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Get the background strain mean values at the no drug conc to calculate shift Background_L <- X_stats_BY_L$mean[1] Background_K <- X_stats_BY_K$mean[1] Background_r <- X_stats_BY_r$mean[1] Background_AUC <- X_stats_BY_AUC$mean[1] # Create empty plots for plotting element p_l <- ggplot() p_K <- ggplot() p_r <- ggplot() p_AUC <- ggplot() p_rf_l <- ggplot() p_rf_K <- ggplot() p_rf_r <- ggplot() p_rf_AUC <- ggplot() # Get only the deletion strains X2 <- X X2 <- X2[X2$OrfRep != "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max X2$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min for (i in 1:length(unique(X2$Conc_Num))) { Concentration <- unique(X2$Conc_Num)[i] X2_temp <- X2[X2$Conc_Num == Concentration, ] if (Concentration == 0) { X2_new <- X2_temp sprintf("Check loop order, conc = %f", Concentration) } if (Concentration > 0) { try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) # X2_temp[X2_temp$K == 0, ]$K <- X_stats_ALL_K$max[i] # X2_temp[X2_temp$r == 0, ]$r <- X_stats_ALL_r$max[i] # X2_temp[X2_temp$AUC == 0, ]$AUC <- X_stats_ALL_AUC$max[i] sprintf("Check loop order, conc = %f", Concentration) X2_new <- rbind(X2_new, X2_temp) } } X2 <- X2_new # Get only the RF strains X2_RF <- X X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max X2_RF$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min for (i in 1:length(unique(X2_RF$Conc_Num))) { Concentration <- unique(X2_RF$Conc_Num)[i] X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration, ] if (Concentration == 0) { X2_RF_new <- X2_RF_temp sprintf("Check loop order, conc = %f", Concentration) } if (Concentration > 0) { try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1) try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", Concentration) X2_RF_new <- rbind(X2_RF_new, X2_RF_temp) } } X2_RF <- X2_RF_new # Part 4 Get the RF Z score values # Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score X2_RF$OrfRep <- paste(X2_RF$OrfRep, X2_RF$Gene, X2_RF$Num., sep = "_") num_genes_RF <- length(unique(X2_RF$OrfRep)) # print(num_genes_RF) # Create the output data.frame containing columns for each RF strain InteractionScores_RF <- unique(X2_RF["OrfRep"]) # InteractionScores_RF$Gene <- unique(X2$Gene) InteractionScores_RF$Gene <- NA InteractionScores_RF$Raw_Shift_L <- NA InteractionScores_RF$Z_Shift_L <- NA InteractionScores_RF$lm_Score_L <- NA InteractionScores_RF$Z_lm_L <- NA InteractionScores_RF$R_Squared_L <- NA InteractionScores_RF$Sum_Z_Score_L <- NA InteractionScores_RF$Avg_Zscore_L <- NA InteractionScores_RF$Raw_Shift_K <- NA InteractionScores_RF$Z_Shift_K <- NA InteractionScores_RF$lm_Score_K <- NA InteractionScores_RF$Z_lm_K <- NA InteractionScores_RF$R_Squared_K <- NA InteractionScores_RF$Sum_Z_Score_K <- NA InteractionScores_RF$Avg_Zscore_K <- NA InteractionScores_RF$Raw_Shift_r <- NA InteractionScores_RF$Z_Shift_r <- NA InteractionScores_RF$lm_Score_r <- NA InteractionScores_RF$Z_lm_r <- NA InteractionScores_RF$R_Squared_r <- NA InteractionScores_RF$Sum_Z_Score_r <- NA InteractionScores_RF$Avg_Zscore_r <- NA InteractionScores_RF$Raw_Shift_AUC <- NA InteractionScores_RF$Z_Shift_AUC <- NA InteractionScores_RF$lm_Score_AUC <- NA InteractionScores_RF$Z_lm_AUC <- NA InteractionScores_RF$R_Squared_AUC <- NA InteractionScores_RF$Sum_Z_Score_AUC <- NA InteractionScores_RF$Avg_Zscore_AUC <- NA InteractionScores_RF$NG <- NA InteractionScores_RF$SM <- NA for (i in 1:num_genes_RF) { # Get each deletion strain ORF Gene_Sel <- unique(X2_RF$OrfRep)[i] # Extract only the current deletion strain and its data X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel, ] X_stats_interaction <- ddply( X_Gene_Sel, c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) # Get shift vals # if L is 0, that means the no growth on no drug # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) { X_stats_interaction$Raw_Shift_L <- 0 X_stats_interaction$Raw_Shift_K <- 0 X_stats_interaction$Raw_Shift_r <- 0 X_stats_interaction$Raw_Shift_AUC <- 0 X_stats_interaction$Z_Shift_L <- 0 X_stats_interaction$Z_Shift_K <- 0 X_stats_interaction$Z_Shift_r <- 0 X_stats_interaction$Z_Shift_AUC <- 0 } else { X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1] X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1] X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1] X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1] } # Get WT vals X_stats_interaction$WT_l <- X_stats_BY_L$mean X_stats_interaction$WT_K <- X_stats_BY_K$mean X_stats_interaction$WT_r <- X_stats_BY_r$mean X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean # Get WT SD X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd # Only get scores if there's growth at no drug if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) { # Calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC # Calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC # Disregard shift for no growth values in Z score calculation if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC } # Disregard shift for set to max values in Z score calculation if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AU } # Calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) # Get linear model gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] r_squared_l <- summary(gene_lm_L)$r.squared gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] r_squared_K <- summary(gene_lm_K)$r.squared gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] r_squared_AUC <- summary(gene_lm_AUC)$r.squared # Get total of non removed values Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L, na.rm = TRUE) InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc) InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K, na.rm = TRUE) InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc) InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r, na.rm = TRUE) InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1) InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1) } if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) { # Calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC # Calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC # Disregard shift for missing values in Z score calculation if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC } # Disregard shift for set to max values in Z score calculation if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC } # Calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) # NA values for the next part since there's an NA or 0 at the no drug. gene_lm_L <- NA gene_lm_K <- NA gene_lm_r <- NA gene_lm_AUC <- NA gene_interaction_L <- NA r_squared_l <- NA gene_interaction_K <- NA r_squared_K <- NA gene_interaction_r <- NA r_squared_r <- NA gene_interaction_AUC <- NA r_squared_AUC <- NA X_stats_interaction$Raw_Shift_L <- NA X_stats_interaction$Raw_Shift_K <- NA X_stats_interaction$Raw_Shift_r <- NA X_stats_interaction$Raw_Shift_AUC <- NA X_stats_interaction$Z_Shift_L <- NA X_stats_interaction$Z_Shift_K <- NA X_stats_interaction$Z_Shift_r <- NA X_stats_interaction$Z_Shift_AUC <- NA InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA } if (i == 1) { X_stats_interaction_ALL_RF <- X_stats_interaction } if (i > 1) { X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF, X_stats_interaction) } InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE) InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE) InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE) # X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int) } print("Pass RF Calculation loop") lm_sd_L <- sd(InteractionScores_RF$lm_Score_L, na.rm = TRUE) lm_sd_K <- sd(InteractionScores_RF$lm_Score_K, na.rm = TRUE) lm_sd_r <- sd(InteractionScores_RF$lm_Score_r, na.rm = TRUE) lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE) lm_mean_L <- mean(InteractionScores_RF$lm_Score_L, na.rm = TRUE) lm_mean_K <- mean(InteractionScores_RF$lm_Score_K, na.rm = TRUE) lm_mean_r <- mean(InteractionScores_RF$lm_Score_r, na.rm = TRUE) lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE) print(paste("Mean RF linear regression score L", lm_mean_L)) InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L) / (lm_sd_L) InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K) / (lm_sd_K) InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r) / (lm_sd_r) InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC) / (lm_sd_AUC) InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L, decreasing = TRUE), ] InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG, decreasing = TRUE), ] write.csv(InteractionScores_RF, file.path(outDir, "RF_ZScores_Interaction.csv"), row.names = FALSE) for (i in 1:num_genes_RF) { Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel, ] X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel, ] p_rf_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(X_Int_Scores$Avg_Zscore_L, 2))) + annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(X_Int_Scores$Z_lm_L, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_rf_K[[i]] <- ggplot( X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) + annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_L, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_rf_r[[i]] <- ggplot( X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) + annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_r, 2))) + annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num)) ) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() p_rf_AUC[[i]] <- ggplot( X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) + annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_AUC, 2))) + annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num)) ) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { X_stats_interaction_ALL_RF_final <- X_ZCalculations } if (i > 1) { X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final, X_ZCalculations) } } print("Pass RF ggplot loop") write.csv(X_stats_interaction_ALL_RF_final, file.path(outDir, "RF_ZScore_Calculations.csv"), row.names = FALSE) # Part 5 - Get Zscores for Gene deletion strains # Get total number of genes for the next loop num_genes <- length(unique(X2$OrfRep)) # print(num_genes) # Create the output data.frame containing columns for each deletion strain InteractionScores <- unique(X2["OrfRep"]) # InteractionScores$Gene <- unique(X2$Gene) InteractionScores$Gene <- NA InteractionScores$Raw_Shift_L <- NA InteractionScores$Z_Shift_L <- NA InteractionScores$lm_Score_L <- NA InteractionScores$Z_lm_L <- NA InteractionScores$R_Squared_L <- NA InteractionScores$Sum_Z_Score_L <- NA InteractionScores$Avg_Zscore_L <- NA InteractionScores$Raw_Shift_K <- NA InteractionScores$Z_Shift_K <- NA InteractionScores$lm_Score_K <- NA InteractionScores$Z_lm_K <- NA InteractionScores$R_Squared_K <- NA InteractionScores$Sum_Z_Score_K <- NA InteractionScores$Avg_Zscore_K <- NA InteractionScores$Raw_Shift_r <- NA InteractionScores$Z_Shift_r <- NA InteractionScores$lm_Score_r <- NA InteractionScores$Z_lm_r <- NA InteractionScores$R_Squared_r <- NA InteractionScores$Sum_Z_Score_r <- NA InteractionScores$Avg_Zscore_r <- NA InteractionScores$Raw_Shift_AUC <- NA InteractionScores$Z_Shift_AUC <- NA InteractionScores$lm_Score_AUC <- NA InteractionScores$Z_lm_AUC <- NA InteractionScores$R_Squared_AUC <- NA InteractionScores$Sum_Z_Score_AUC <- NA InteractionScores$Avg_Zscore_AUC <- NA InteractionScores$NG <- NA InteractionScores$DB <- NA InteractionScores$SM <- NA for (i in 1:num_genes) { # Get each deletion strain ORF Gene_Sel <- unique(X2$OrfRep)[i] # Extract only the current deletion strain and its data X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel, ] X_stats_interaction <- ddply( X_Gene_Sel, c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"), summarise, N = (length(l)), mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) # Get shift vals # if L is 0, that means the no growth on no drug # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) { X_stats_interaction$Raw_Shift_L <- 0 X_stats_interaction$Raw_Shift_K <- 0 X_stats_interaction$Raw_Shift_r <- 0 X_stats_interaction$Raw_Shift_AUC <- 0 X_stats_interaction$Z_Shift_L <- 0 X_stats_interaction$Z_Shift_K <- 0 X_stats_interaction$Z_Shift_r <- 0 X_stats_interaction$Z_Shift_AUC <- 0 } else { X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1] X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1] X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1] X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1] } # Get WT vals X_stats_interaction$WT_l <- X_stats_BY_L$mean X_stats_interaction$WT_K <- X_stats_BY_K$mean X_stats_interaction$WT_r <- X_stats_BY_r$mean X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean # Get WT SD X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd # Only get scores if there's growth at no drug if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) { # Calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC # Calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC # Disregard shift for no growth values in Z score calculation if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC } # Disregard shift for set to max values in Z score calculation if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC } # Calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) # Get linear model gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction) gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] r_squared_l <- summary(gene_lm_L)$r.squared gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] r_squared_K <- summary(gene_lm_K)$r.squared gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] r_squared_AUC <- summary(gene_lm_AUC)$r.squared # Get total of non removed values Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L - lm_mean_L) / lm_sd_L InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L, na.rm = TRUE) InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc) InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K - lm_mean_K) / lm_sd_K InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K, na.rm = TRUE) InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc) InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r - lm_mean_r) / lm_sd_r InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r, na.rm = TRUE) InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1) InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1) } if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) { # Calculate expected values X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC # Calculate normalized delta values X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC # Disregard shift for missing values in Z score calculatiom if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC } # Disregard shift for set to max values in Z score calculation if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC } # Calculate Z score at each concentration X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) # NA values for the next part since there's an NA or 0 at the no drug. gene_lm_L <- NA gene_lm_K <- NA gene_lm_r <- NA gene_interaction_L <- NA r_squared_l <- NA gene_interaction_K <- NA r_squared_K <- NA gene_interaction_r <- NA r_squared_r <- NA X_stats_interaction$Raw_Shift_L <- NA X_stats_interaction$Raw_Shift_K <- NA X_stats_interaction$Raw_Shift_r <- NA X_stats_interaction$Raw_Shift_AUC <- NA X_stats_interaction$Z_Shift_L <- NA X_stats_interaction$Z_Shift_K <- NA X_stats_interaction$Z_Shift_r <- NA X_stats_interaction$Z_Shift_AUC <- NA InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA } if (i == 1) { X_stats_interaction_ALL <- X_stats_interaction } if (i > 1) { X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL, X_stats_interaction) } InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE) InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE) InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE) # X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int) } print("Pass Int Calculation loop") InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L, decreasing = TRUE), ] InteractionScores <- InteractionScores[order(InteractionScores$NG, decreasing = TRUE), ] df_order_by_OrfRep <- unique(InteractionScores$OrfRep) # X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG, decreasing = TRUE), ] write.csv(InteractionScores, file.path(outDir, "ZScores_Interaction.csv"), row.names = FALSE) InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2, ] InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2, ] InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2, ] InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2, ] InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2, ] InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2, ] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2, ] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2, ] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2, ] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2, ] InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[ !is.na(InteractionScores_deletion_enhancers_L$OrfRep), ] InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[ !is.na(InteractionScores_deletion_enhancers_K$OrfRep), ] InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[ !is.na(InteractionScores_deletion_suppressors_L$OrfRep), ] InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[ !is.na(InteractionScores_deletion_suppressors_K$OrfRep), ] InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[ !is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep), ] InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[ !is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep), ] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[ !is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep), ] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[ !is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep), ] InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[ !is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep), ] InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[ !is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep), ] write.csv(InteractionScores_deletion_enhancers_L, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_K, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_suppressors_L, file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_suppressors_K, file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L, file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L, file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K, file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K, file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv"), row.names = FALSE) # Get enhancers and suppressors for linear regression InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2, ] InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2, ] InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2, ] InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2, ] InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2, ] InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2, ] InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[ !is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep), ] InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[ !is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep), ] InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[ !is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep), ] InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[ !is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep), ] InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[ !is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep), ] InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[ !is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep), ] write.csv(InteractionScores_deletion_enhancers_L_lm, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_K_lm, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_suppressors_L_lm, file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_suppressors_K_lm, file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv"), row.names = FALSE) write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm, file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv"), row.names = FALSE) # write.csv(Labels, studyInfo, row.names = FALSE) # write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE) for (i in 1:num_genes) { Gene_Sel <- unique(InteractionScores$OrfRep)[i] X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel, ] X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel, ] p_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_L, 2))) + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_L, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_K[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_K, 2))) + annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_r[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) + annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_r, 2))) + annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() p_AUC[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) + scale_color_discrete(guide = FALSE) + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) + annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_AUC, 2))) + annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) + annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) + annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X_ZCalculations$Conc_Num_Factor), labels = unique(as.character(X_ZCalculations$Conc_Num))) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { X_stats_interaction_ALL_final <- X_ZCalculations } if (i > 1) { X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final, X_ZCalculations) } } print("Pass Int ggplot loop") write.csv(X_stats_interaction_ALL_final, file.path(outDir, "ZScore_Calculations.csv"), row.names = FALSE) Blank <- ggplot(X2_RF) + geom_blank() pdf(file.path(outDir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) X_stats_X2_RF <- ddply( X2_RF, c("Conc_Num", "Conc_Num_Factor"), summarise, mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), max_L = max(l, na.rm = TRUE), min_L = min(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), max_K = max(K, na.rm = TRUE), min_K = min(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), max_AUC = max(AUC, na.rm = TRUE), min_AUC = min(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) + theme_publication() K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) + theme_publication() R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) + theme_publication() AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) + theme_publication() L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + theme_publication() grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2) grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2) # Plot the references # grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4) # grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 j <- rep(1, ((num_genes) / 3) - 1) for (n in 1:length(j)) { j[n + 1] <- n * 3 + 1 } # Loop for printing each plot num <- 0 for (m in 1:(round((num_genes) / 3) - 1)) { num <- j[m] grid.arrange(p_l[[num]], p_K[[num]], p_r[[num]], p_AUC[[num]], p_l[[num + 1]], p_K[[num + 1]], p_r[[num + 1]], p_AUC[[num + 1]], p_l[[num + 2]], p_K[[num + 2]], p_r[[num + 2]], p_AUC[[num + 2]], ncol = 4, nrow = 3) # grid.arrange(p_l[[364]], p_K[[364]], p_r[[364]], # p_l[[365]], p_K[[365]], p_r[[365]], p_l[[366]], p_K[[366]], p_r[[366]], ncol = 3, nrow = 3) # p1[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_K[[num + 4]], p_r[[num + 4]] } if (num_genes != (num + 2)) { total_num <- num_genes - (num + 2) if (total_num == 5) { grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]], p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], p_AUC[[num + 7]], Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) } if (total_num == 4) { grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]], Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) } if (total_num == 3) { grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } if (total_num == 2) { grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } if (total_num == 1) { grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } } dev.off() pdf(file.path(outDir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) X_stats_X2_RF <- ddply( X2_RF, c("Conc_Num", "Conc_Num_Factor"), summarise, mean_L = mean(l, na.rm = TRUE), median_L = median(l, na.rm = TRUE), max_L = max(l, na.rm = TRUE), min_L = min(l, na.rm = TRUE), sd_L = sd(l, na.rm = TRUE), mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), max_K = max(K, na.rm = TRUE), min_K = min(K, na.rm = TRUE), sd_K = sd(K, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), max_AUC = max(AUC, na.rm = TRUE), min_AUC = min(AUC, na.rm = TRUE), sd_AUC = sd(AUC, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) + theme_publication() K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) + theme_publication() R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) + theme_publication() AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar", color = "red" ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) + annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) + theme_publication() L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) + geom_boxplot() + scale_x_discrete( name = unique(X$Drug[1]), breaks = unique(X2_RF$Conc_Num_Factor), labels = as.character(unique(X2_RF$Conc_Num)) ) + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) + theme_publication() grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2) grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2) # Plot the references # grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4) # grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 j <- rep(1, ((num_genes_RF) / 3) - 1) for (n in 1:length(j)) { j[n + 1] <- n * 3 + 1 } # Loop for printing each plot num <- 0 for (m in 1:(round((num_genes_RF) / 3) - 1)) { num <- j[m] grid.arrange(p_rf_l[[num]], p_rf_K[[num]], p_rf_r[[num]], p_rf_AUC[[num]], p_rf_l[[num + 1]], p_rf_K[[num + 1]], p_rf_r[[num + 1]], p_rf_AUC[[num + 1]], p_rf_l[[num + 2]], p_rf_K[[num + 2]], p_rf_r[[num + 2]], p_rf_AUC[[num + 2]], ncol = 4, nrow = 3) # grid.arrange(p_rf_l[[364]], p_rf_K[[364]], p_rf_r[[364]], # p_rf_l[[365]], p_rf_K[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_K[[366]], p_rf_r[[366]], ncol = 3, nrow = 3) # p1[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]] } if (num_genes_RF != (num + 2)) { total_num <- num_genes_RF - (num + 2) if (total_num == 5) { grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 ) grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]], p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], p_rf_AUC[[num + 7]], Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 ) } if (total_num == 4) { grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 ) grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]], Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 ) } if (total_num == 3) { grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } if (total_num == 2) { grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } if (total_num == 1) { grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 ) # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) } } dev.off() # Print rank plots for L and K gene interactions InteractionScores_AdjustMissing <- InteractionScores InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L), ]$Avg_Zscore_L <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K), ]$Avg_Zscore_K <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r), ]$Avg_Zscore_r <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC), ]$Avg_Zscore_AUC <- 0.001 InteractionScores_AdjustMissing$L_Rank <- NA InteractionScores_AdjustMissing$K_Rank <- NA InteractionScores_AdjustMissing$r_Rank <- NA InteractionScores_AdjustMissing$AUC_Rank <- NA InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L), ]$Z_lm_L <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K), ]$Z_lm_K <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r), ]$Z_lm_r <- 0.001 InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC), ]$Z_lm_AUC <- 0.001 InteractionScores_AdjustMissing$L_Rank_lm <- NA InteractionScores_AdjustMissing$K_Rank_lm <- NA InteractionScores_AdjustMissing$r_Rank_lm <- NA InteractionScores_AdjustMissing$AUC_Rank_lm <- NA InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) # Rank plots Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1, ])[1])) + theme_publication() Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2, ])[1])) + theme_publication() Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3, ])[1])) + theme_publication() Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1, ])[1])) + theme_publication() Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2, ])[1])) + theme_publication() Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3, ])[1])) + theme_publication() Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(outDir, "RankPlots.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( Rank_L_1SD, Rank_L_2SD, Rank_L_3SD, Rank_K_1SD, Rank_K_2SD, Rank_K_3SD, ncol = 3, nrow = 2 ) grid.arrange( Rank_L_1SD_notext, Rank_L_2SD_notext, Rank_L_3SD_notext, Rank_K_1SD_notext, Rank_K_2SD_notext, Rank_K_3SD_notext, ncol = 3, nrow = 2 ) dev.off() Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1, ])[1])) + theme_publication() Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2, ])[1])) + theme_publication() Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3, ])[1])) + theme_publication() Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1, ])[1])) + theme_publication() Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2, ])[1])) + theme_publication() Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3, ])[1])) + annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3, ])[1])) + theme_publication() Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(outDir, "RankPlots_lm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( Rank_L_1SD_lm, Rank_L_2SD_lm, Rank_L_3SD_lm, Rank_K_1SD_lm, Rank_K_2SD_lm, Rank_K_3SD_lm, ncol = 3, nrow = 2 ) grid.arrange( Rank_L_1SD_notext_lm, Rank_L_2SD_notext_lm, Rank_L_3SD_notext_lm, Rank_K_1SD_notext_lm, Rank_K_2SD_notext_lm, Rank_K_3SD_notext_lm, ncol = 3, nrow = 2 ) dev.off() X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L), ] # Find overlaps X_NArm$Overlap <- "No Effect" try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Both") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Both") try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2, ]$Overlap <- "Deletion Enhancer lm only") try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2, ]$Overlap <- "Deletion Suppressor lm only") try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only") try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_L <- lm(X_NArm$Z_lm_L ~ X_NArm$Avg_Zscore_L) L_lm <- summary(get_lm_L) get_lm_K <- lm(X_NArm$Z_lm_K ~ X_NArm$Avg_Zscore_K) K_lm <- summary(get_lm_K) get_lm_r <- lm(X_NArm$Z_lm_r ~ X_NArm$Avg_Zscore_r) r_lm <- summary(get_lm_r) get_lm_AUC <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Avg_Zscore_AUC) AUC_lm <- summary(get_lm_AUC) pdf(file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE) print(ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(X_NArm, aes(Avg_Zscore_K, Z_lm_K)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm K") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(K_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(X_NArm, aes(Avg_Zscore_r, Z_lm_r)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm r") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + theme_publication_legend_right()) print(ggplot(X_NArm, aes(Avg_Zscore_AUC, Z_lm_AUC)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm AUC") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(AUC_lm$r.squared, 2))) + theme_publication_legend_right()) dev.off() lm_v_Zscore_L <- ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right() pgg <- ggplotly(lm_v_Zscore_L) plotly_path <- file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_L2 <- lm(X_NArm$L_Rank_lm ~ X_NArm$L_Rank) L_lm2 <- summary(get_lm_L2) get_lm_K2 <- lm(X_NArm$K_Rank_lm ~ X_NArm$K_Rank) K_lm2 <- summary(get_lm_K2) get_lm_r2 <- lm(X_NArm$r_Rank_lm ~ X_NArm$r_Rank) r_lm2 <- summary(get_lm_r2) get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm ~ X_NArm$AUC_Rank) AUC_lm2 <- summary(get_lm_AUC2) num_genes_NArm2 <- (dim(X_NArm)[1]) / 2 pdf(file.path(outDir, "Avg_Zscore_vs_lm_ranked_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE) print( ggplot(X_NArm, aes(L_Rank, L_Rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm L") + annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(L_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(X_NArm, aes(K_Rank, K_Rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm K") + annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(K_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(X_NArm, aes(r_Rank, r_Rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm r") + annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( ggplot(X_NArm, aes(AUC_Rank, AUC_Rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank of Avg Zscore vs lm AUC") + annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(AUC_lm2$r.squared, 2))) + theme_publication_legend_right() ) dev.off() Rank_L_1SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 1, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -1, ])[1])) + theme_publication() Rank_L_2SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 2, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -2, ])[1])) + theme_publication() Rank_L_3SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 3, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -3, ])[1])) + theme_publication() Rank_K_1SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -1, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 1, ])[1])) + theme_publication() Rank_K_2SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -2, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 2, ])[1])) + theme_publication() Rank_K_3SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -3, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 3, ])[1])) + theme_publication() Rank_L_1SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_2SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_3SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_1SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_2SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_3SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(outDir, "RankPlots_naRM.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( Rank_L_1SD, Rank_L_2SD, Rank_L_3SD, Rank_K_1SD, Rank_K_2SD, Rank_K_3SD, ncol = 3, nrow = 2 ) grid.arrange( Rank_L_1SD_notext, Rank_L_2SD_notext, Rank_L_3SD_notext, Rank_K_1SD_notext, Rank_K_2SD_notext, Rank_K_3SD_notext, ncol = 3, nrow = 2 ) dev.off() Rank_L_1SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 1, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -1, ])[1])) + theme_publication() Rank_L_2SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 2, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -2, ])[1])) + theme_publication() Rank_L_3SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 3, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -3, ])[1])) + theme_publication() Rank_K_1SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -1, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 1, ])[1])) + theme_publication() Rank_K_2SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -2, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 2, ])[1])) + theme_publication() Rank_K_3SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -3, ])[1])) + annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 3, ])[1])) + theme_publication() Rank_L_1SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_2SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_L_3SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_1SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_2SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() Rank_K_3SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() pdf(file.path(outDir, "RankPlots_lm_naRM.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( Rank_L_1SD_lm, Rank_L_2SD_lm, Rank_L_3SD_lm, Rank_K_1SD_lm, Rank_K_2SD_lm, Rank_K_3SD_lm, ncol = 3, nrow = 2 ) grid.arrange( Rank_L_1SD_notext_lm, Rank_L_2SD_notext_lm, Rank_L_3SD_notext_lm, Rank_K_1SD_notext_lm, Rank_K_2SD_notext_lm, Rank_K_3SD_notext_lm, ncol = 3, nrow = 2 ) dev.off() } # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 get_lm_1 <- lm(X_NArm$Z_lm_K ~ X_NArm$Z_lm_L) L_lm_1 <- summary(get_lm_1) get_lm_2 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_L) L_lm_2 <- summary(get_lm_2) get_lm_3 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_L) L_lm_3 <- summary(get_lm_3) get_lm_4 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_K) L_lm_4 <- summary(get_lm_4) get_lm_5 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_K) L_lm_5 <- summary(get_lm_5) get_lm_6 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_r) L_lm_6 <- summary(get_lm_6) pdf(file.path(outDir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE) ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L), ] ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_K), color = "cyan") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_r), color = "cyan") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_AUC), color = "cyan") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_r), color = "cyan") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_AUC), color = "cyan") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) + geom_point(shape = 3, color = "gray70") + geom_point(data = InteractionScores_RF2, aes(Z_lm_r, Z_lm_AUC), color = "cyan") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18) ) dev.off() # write.csv(Labels, file.path("../Code/Parameters.csv"), row.names = FALSE) timestamp()