From 6992d5eec035fdaf49bbea18bd12513d6a9e80c0 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 14 Aug 2024 23:20:29 -0400 Subject: [PATCH] Rollup before parallelization --- workflow/.lintr | 2 +- workflow/README.md | 126 +- workflow/apps/r/TSHeatmaps5dev2.R | 169 +- workflow/apps/r/createHeatMapsHomology.R | 63 +- workflow/apps/r/gtaTemplate.R | 2 +- workflow/apps/r/interactions.R | 3489 +++++++++++----------- workflow/apps/r/joinInteractExps.R | 75 +- workflow/qhtcp-workflow | 1025 +++---- 8 files changed, 2517 insertions(+), 2434 deletions(-) diff --git a/workflow/.lintr b/workflow/.lintr index 70de9dfb..c2e4f11f 100644 --- a/workflow/.lintr +++ b/workflow/.lintr @@ -1,5 +1,5 @@ linters: linters_with_defaults( - object_name_linter = NULL, + # object_name_linter = NULL, object_usage_linter = NULL, commented_code_linter = NULL, trailing_whitespace_linter(allow_empty_lines = TRUE), diff --git a/workflow/README.md b/workflow/README.md index 87b4bf1c..cfa26783 100644 --- a/workflow/README.md +++ b/workflow/README.md @@ -33,7 +33,7 @@ Insert a general description of Q-HTCP and the Q-HTCP process here. * [pl_gtf_terms2tsv](#plgtfterms2tsv) * [py_gtf_concat](#pygtfconcat) * [r_compile_gtf](#rcompilegtf) -* [get_studies](#getstudies) +* [study_info](#studyinfo) * [choose_easy_results](#chooseeasyresults) ## Notes @@ -183,7 +183,7 @@ If you wish to install them manually, you can use the following information to d #### Perl -* `cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder` +* `cpan -I -i File::Map ExtUtils::PkgConfig GD GO::TermFinder` #### R @@ -199,7 +199,7 @@ This module: * Initializes a project directory in the scans directory -TODO +:bulb: **TODO** * Copy over source image directories from robot * MasterPlate_ file **should not be an xlsx file**, no portability @@ -207,7 +207,7 @@ TODO * But moving forward should switch to csv or something open * Do we need to sync a QHTCP template? -NOTES +:memo: **NOTES** * Copy over the images from the robot and then DO NOT TOUCH that directory except to copy from it * Write-protect (read-only) if we need to @@ -522,12 +522,11 @@ TODO WIP System for Multi-QHTCP-Experiment Gene Interaction Profiling Analysis * Functional rewrite of REMcMaster3.sh, RemcMaster2.sh, REMcJar2.sh, ExpFrontend.m, mProcess.sh, mFunction.sh, mComponent.sh -* Added a newline character to the end of StudyInfo.csv so it is a valid text file +* Added a newline character to the end of the study info file so it is a valid text file TODO * Suggest renaming StudiesQHTCP to something like qhtcp qhtcp_output or output -* Store StudyInfo somewhere better * Move (hide) the study template somewhere else * StudiesArchive should be smarter: * Create a database with as much information as possible @@ -592,7 +591,7 @@ TODO #### Arguments -* **$1** (string): studyInfo file +* **$1** (string): study info file ### gtf @@ -640,14 +639,14 @@ TODO * Is GTAtemplate.R actually a template? * Do we need to allow user customization? -Files +INPUT * [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd) * go_terms.tab -Output +OUTPUT -* +* Average_GOTerms_All.csv #### Arguments @@ -663,11 +662,13 @@ PairwiseLK.R R script TODO -* Should move directory creation from PairwiseLK.R to gta module +* Move directory creation from PairwiseLK.R to gta module +* Needs better output filenames and directory organization +* Needs more for looping to reduce verbosity -Files +INPUT -* +* Average_GOTerms_All.csv * Output @@ -684,7 +685,7 @@ This wrapper: * **$1** (string): First Exp# name * **$2** (string): Second Exp# name -* **$3** (string): StudyInfo.csv file +* **$3** (string): study info file * **$4** (string): output directory ### r_gta_heatmaps @@ -693,9 +694,10 @@ TSHeatmaps5dev2.R R script TODO -* Script could use rename -* Script should be refactored to automatically allow more studies -* Script should be refactored with more looping to reduce verbosity +* Rename +* Refactor to automatically allow more studies +* Refactor with more looping to reduce verbosity +* Reduce cyclomatic complexity of some of the for loops Files @@ -709,13 +711,13 @@ Output This wrapper: * The Term Specific Heatmaps are produced directly from the ../ExpStudy/Exp_/ZScores/ZScores_Interaction.csv file generated by the user modified interaction… .R script. -* The heatmap labeling is per the names the user wrote into the StudyInfo.txt spreadsheet. +* The heatmap labeling is per the names the user wrote into the study info file * Verify that the All_SGD_GOTerms_for_QHTCPtk.csv found in ../Code is what you wish to use or if you wish to use a custom modified version. * If you wish to use a custom modified version, create it and modify the TSHeatmaps template script (TSHeatmaps5dev2.R) and save it as a ‘TSH_study specific name’. #### Arguments -* **$1** (string): StudyInfo.csv file +* **$1** (string): study info file * **$2** (string): gene_ontology_edit.obo file * **$3** (string): go_terms.tab file * **$4** (string): All_SGD_GOTerms_for_QHTCPtk.csv @@ -737,6 +739,14 @@ TODO * Re-enable disabled linter checks * Reduce cyclomatic complexity of some of the for loops * There needs to be one point of truth for the SD factor +* Replace most paste() functions with printf() + +INPUT + +* easy/results_std.txt + + + NOTES @@ -744,18 +754,26 @@ NOTES #### Arguments -* **$1** (string): The input directory +* **$1** (string): The input results_std.txt * **$2** (string): The zscores directory * **$3** (string): The study info file * **$4** (string): SGD_features.tab -* **$5** (integer): delta SD background value (default: 5) -* **$6** (integer): experiment number +* **$5** (integer): experiment number +* **$6** (integer): delta SD background value (default: 3) ### r_join_interactions JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv -Output +TODO + +* Needs more loops to reduce verbosity + +INPUT + +* + +OUTPUT * REMcRdy_lm_only.csv * Shift_only.csv @@ -765,7 +783,7 @@ Output * **$1** (string): The output directory * **$2** (string): The sd value -* **$3** (string): The studyInfo file +* **$3** (string): The study info file ### java_extract @@ -785,10 +803,10 @@ NOTE #### Arguments -* **$1** (string): GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab +* **$1** (string): The output directory * **$2** (string): ORF_List_Without_DAmPs.txt * **$3** (string): REMcRdy_lm_only.csv -* **$4** (string): The output directory +* **$4** (string): GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab * **$5** (string): The output file #### Exit codes @@ -805,13 +823,25 @@ and output "REMcWithShift.csv" for use with the REMc heat maps * **$1** (string): REMcRdy_lm_only.csv-finalTable.csv * **$2** (string): Shift_only.csv -* **$3** (string): StudyInfo.csv file -* **$4** (string): The sd value +* **$3** (string): study info file +* **$4** (string): sd value ### r_create_heat_maps Execute createHeatMaps.R +INPUT + +* REMcWithShift.csv + +OUTPUT + +* compiledREMcHeatmaps.pdf + +TODO + +* Needs more looping for brevity + #### Arguments * **$1** (string): The final shift table (REMcWithShift.csv) @@ -832,7 +862,9 @@ Execute createHeatMapsAll.R Perform python dcon portion of GTF -Output +SCRIPT: [DconJG2.py](apps/python/DconJG2.py) + +OUTPUT * 1-0-0-finaltable.csv @@ -844,9 +876,13 @@ Output ### pl_gtf_analyze Perl analyze wrapper -This seems weird to me because we're just overwriting the same data for all set2 members -https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl -Is there a reason you need a custom version and not the original from cpan? + +SCRIPT: [analyze_v2.pl](https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl) + +TODO + +* Are we just overwriting the same data for all set2 members? +* Why the custom version? #### Arguments @@ -858,7 +894,10 @@ Is there a reason you need a custom version and not the original from cpan? ### pl_gtf_terms2tsv Perl terms2tsv wrapper -Probably should be translated to shell/python + +TODO + +* Probably should be translated to shell/python #### Arguments @@ -868,7 +907,10 @@ Probably should be translated to shell/python Python concat wrapper for GTF Concat the process ontology outputs from the /REMcReady_lm_only folder -Probably should be translated to bash + +TODO + +* Probably should be translated to bash #### Arguments @@ -883,24 +925,18 @@ Compile GTF in R * **$1** (string): gtf output directory -### get_studies +### study_info -Parse study names from StudyInfo.csv files +Creates, modifies, and parses the study info file TODO -* This whole wrapper should eventually be either -* Removed -* Expanded into a file that stores all project/study settings (database) -* I had to had a new line to the end of StudyInfo.csv, may break things? - -#### Arguments - -* **$1** (string): Study info file +* Needs refactoring +* Ended up combining a few functions into one #### Variables set -* **STUDIES_NUMS** (array): Contains Exp numbers +* **STUDIES_NUMS** (array): contains Exp numbers #### Exit codes diff --git a/workflow/apps/r/TSHeatmaps5dev2.R b/workflow/apps/r/TSHeatmaps5dev2.R index fa5db68f..2bf6349a 100644 --- a/workflow/apps/r/TSHeatmaps5dev2.R +++ b/workflow/apps/r/TSHeatmaps5dev2.R @@ -8,17 +8,16 @@ # @arg $2 string gene_ontology_edit.obo file # @arg $3 string go_terms.tab file # @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv -# @arg $5 string ZScores_interaction.csv -# @arg $6 string base directory -# @arg $7 string output directory +# @arg $5 string base directory +# @arg $6 string output directory library("ontologyIndex") library("ggplot2") library("RColorBrewer") library("grid") library("ggthemes") -#library("plotly") -#library("htmlwidgets") +# library("plotly") +# library("htmlwidgets") library("extrafont") library("stringr") library("org.Sc.sgd.db") @@ -31,10 +30,9 @@ study_info_file <- args[1] ontology_file <- args[2] sgd_terms_tfile <- args[3] all_sgd_terms_csv <- args[4] -zscores_file <- args[5] -base_dir <- args[6] -output_dir <- args[7] -study_nums <- args[8:length(args)] +base_dir <- args[5] +output_dir <- args[6] +study_nums <- args[7:length(args)] # Import standard tables used in Sean's code That should be copied to each ExpStudy labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) @@ -52,7 +50,7 @@ XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2]) # Load input files for (study_num in study_nums) { - input_file <- file.path(base_dir, paste("Exp", study_num), zscores_file) + input_file <- file.path(base_dir, paste("Exp", study_num), zscores, "zscores_interaction.csv") if (file.exists(input_file)) { assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE)) assign(paste(Name, study_num), labels[study_num, 2]) @@ -206,10 +204,10 @@ if (length(study_nums) > 1) { try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2) X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"] + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"] X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)] colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap)) @@ -226,12 +224,12 @@ if (length(study_nums) > 2) { X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"] + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"] # Reorder columns X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)] @@ -252,14 +250,14 @@ if (length(study_nums) > 3) { X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | - colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"] + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | + colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"] # Reorder columns X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)] @@ -283,16 +281,16 @@ if (length(study_nums) > 4) { X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | - colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | - colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" | - colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"] + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | + colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | + colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" | + colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"] # Reorder columns X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)] @@ -441,7 +439,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size > 2000) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 45, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 45, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -461,7 +466,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -470,7 +475,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 1000 && Parent_Size <= 2000) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 35, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 35, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -490,7 +502,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -499,7 +511,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 500 && Parent_Size <= 1000) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 30, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 30, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -519,7 +538,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -528,7 +547,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 200 && Parent_Size <= 500) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 25, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 25, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -548,7 +574,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -557,7 +583,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 100 && Parent_Size <= 200) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 20, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 20, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -577,7 +610,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -586,7 +619,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 60 && Parent_Size <= 100) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 15, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 15, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -606,7 +646,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -615,7 +655,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 30 && Parent_Size <= 60) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 10, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 10, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -650,7 +697,7 @@ for (s in 1:dim(XX3)[1]) { keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), na.color = "red", col = brewer.pal(11, "PuOr"), main = GO_Term_Name, - #ColSideColors = ev_repeat, + # ColSideColors = ev_repeat, labRow = as.character(Genes_Annotated_to_Term$Gene) )) } @@ -660,7 +707,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size >= 3 && Parent_Size <= 30) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 7, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) @@ -704,7 +758,14 @@ for (s in 1:dim(XX3)[1]) { } if (Parent_Size == 2) { - pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE) + + pdf( + file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")), + width = 12, + height = 7, + onefile = TRUE + ) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) diff --git a/workflow/apps/r/createHeatMapsHomology.R b/workflow/apps/r/createHeatMapsHomology.R index 029f0b7e..3d22c90a 100644 --- a/workflow/apps/r/createHeatMapsHomology.R +++ b/workflow/apps/r/createHeatMapsHomology.R @@ -1,28 +1,27 @@ #!/usr/bin/env Rscript -# This script will make homology heatmaps for the REMc analysis -# This script didn't have any hard set inputs so I didn't bother -library(RColorBrewer) -library(gplots) -library(tidyverse) +library("RColorBrewer") +library("gplots") +library("tidyverse") args <- commandArgs(TRUE) -# Need to give the input "finalTable.csv" file after running REMc generated by eclipse -inputFinalTable <- file.path(args[1]) - -# Give the DAmP_list.txt as the third argument - will color the gene names differently -DAmPs <- file.path(Args[2]) -DAmP_list <- read.delim(file = DAmPs, header = FALSE, stringsAsFactors = FALSE) - -# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps -mapFile <- file.path(Args[3]) -mapping <- read.csv(file = mapFile, stringsAsFactors = FALSE) # Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps -outputPath <- file.path(Args[4]) +output_path <- file.path(Args[1]) + +# Need to give the input "finalTable.csv" file after running REMc generated by eclipse +final_table <- file.path(args[2]) + +# Give the damp_list.txt as the third argument - will color the gene names differently +damps <- file.path(Args[3]) +damp_list <- read.delim(file = damps, header = FALSE, stringsAsFactors = FALSE) + +# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps +map_file <- file.path(Args[4]) +mapping <- read.csv(file = map_file, stringsAsFactors = FALSE) # Read in finalTablewithShift -hmapfile <- data.frame(read.csv(file = inputFinalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE)) +hmapfile <- data.frame(read.csv(file = final_table, header = TRUE, sep = ",", stringsAsFactors = FALSE)) # Map the finalTable to the human homolog file hmapfile_map <- hmapfile @@ -46,11 +45,11 @@ hmapfile_w_homolog <- full_join(hmapfile_map, mapping, by = c("ORFMatch" = "ense hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == FASLE, ] # Write csv with all info from mapping file -write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologAll.csv", sep = "")), row.names = FALSE) +write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologAll.csv", sep = "")), row.names = FALSE) # Remove the non matches and output another mapping file - this is also one used to make heatmaps hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == FALSE, ] -write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE)) +write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE)) # Add human gene name to the Gene column hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene, hmapfile_w_homolog$external_gene_name_Human, sep = "/") @@ -176,14 +175,14 @@ if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) { # m <- 0 colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 3)]) -colnames(DAmP_list)[1] <- "ORF" -hmapfile$DAmPs <- "YKO" +colnames(damp_list)[1] <- "ORF" +hmapfile$damps <- "YKO" colnames(hmapfile)[2] <- "ORF" -try(hmapfile[hmapfile$ORF %in% DAmP_list$ORF, ]$DAmPs <- "YKD") -# X <- X[order(X$DAmPs,decreasing = TRUE),] +try(hmapfile[hmapfile$ORF %in% damp_list$ORF, ]$damps <- "YKD") +# X <- X[order(X$damps,decreasing = TRUE),] hmapfile$color2 <- NA -try(hmapfile[hmapfile$DAmPs == "YKO", ]$color2 <- "black") -try(hmapfile[hmapfile$DAmPs == "YKD", ]$color2 <- "red") +try(hmapfile[hmapfile$damps == "YKO", ]$color2 <- "black") +try(hmapfile[hmapfile$damps == "YKD", ]$color2 <- "red") hmapfile$color <- NA try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_many2many", ]$color <- "#F8766D") @@ -231,7 +230,7 @@ for (i in 1:num_unique_clusts) { if (cluster_length != 1) { X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 6)]) if (cluster_length >= 2001) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 20, width = 15) heatmap.2( x = X0, @@ -251,7 +250,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length >= 201 && cluster_length <= 2000) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 15, width = 12) heatmap.2( x = X0, @@ -270,7 +269,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length >= 150 && cluster_length <= 200) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 12, width = 12) heatmap.2( x = X0, @@ -288,7 +287,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length >= 101 && cluster_length <= 149) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 12, width = 12) heatmap.2( x = X0, @@ -306,7 +305,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length >= 60 && cluster_length <= 100) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 12, width = 12) heatmap.2( x = X0, @@ -324,7 +323,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length <= 59 && cluster_length >= 30) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 9, width = 12) heatmap.2( x = X0, @@ -342,7 +341,7 @@ for (i in 1:num_unique_clusts) { dev.off() } if (cluster_length <= 29) { - mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) + mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = "")) pdf(file = mypath, height = 7, width = 12) heatmap.2( x = X0, diff --git a/workflow/apps/r/gtaTemplate.R b/workflow/apps/r/gtaTemplate.R index af1db83c..d94f04a8 100644 --- a/workflow/apps/r/gtaTemplate.R +++ b/workflow/apps/r/gtaTemplate.R @@ -50,7 +50,7 @@ if (length(args) >= 5) { # ZScores_Interaction.csv for (m in 1:length(zscores_file)) { - #zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1] + # zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1] X <- read.csv(file = zscores_file[m], stringsAsFactors = FALSE, header = TRUE) if (colnames(X)[1] == "OrfRep") { diff --git a/workflow/apps/r/interactions.R b/workflow/apps/r/interactions.R index 05fea564..9dd69c95 100644 --- a/workflow/apps/r/interactions.R +++ b/workflow/apps/r/interactions.R @@ -3,139 +3,104 @@ # 1. Path to input easy results file # 2. /output/ directory # 3. Path to StudyInfo.csv -# 4. Path to SGDgeneList -# 5. Standard deviation value -# 6. The experiment number (Exp# directory) +# 4. Path to sgd_gene_list +# 5. The experiment number (Exp# directory) +# 6. Standard deviation value -library(ggplot2) -library(plyr) -library(extrafont) -library(gridExtra) -library(gplots) -library(RColorBrewer) -library(stringr) -library(gdata) -library(plotly) -library(htmlwidgets) +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +library("gdata") +library("plotly") +library("htmlwidgets") # Parse arguments args <- commandArgs(TRUE) -inputFile <- file.path(args[1]) +exp_number <- as.numeric(args[1]) +delta_bg_factor <- as.numeric(args[2]) +study_info_file <- file.path(args[3]) +sgd_gene_list <- file.path(args[4]) +input_file <- file.path(args[5]) +out_dir <- file.path(args[6]) -# Set output dir -if (length(args) >= 2) { - outDir <- file.path(args[2]) -} else { - outDir <- "/ZScores/" # for legacy workflow +sprintf("The Standard Deviation value is: %f", delta_bg_factor) + +out_dir_qc <- file.path(out_dir, "qc") + +if (!dir.exists(out_dir)) { + dir.create(out_dir) } -# Set StudyInfo file path -if (length(args) >= 3) { - studyInfo <- file.path(args[3]) -} else { - studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow -} - -# Set SGDgeneList file path -if (length(args) >= 4) { - SGDgeneList <- file.path(args[4]) -} else { - SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow -} - -# Set standard deviation -if (length(args) >= 5) { - delBGFactor <- args[5] -} else { - # User prompt for std multiplier Value - print("Enter a Standard Deviation value for noise filter") - print("Sean Santos recommends 3 or 5") - delBGFactor <- readLines(file("stdin"), n = 1L) -} -delBGFactor <- as.numeric(delBGFactor) -if (is.na(delBGFactor)) { - delBGFactor <- 3 # recommended by Sean -} -print(paste("The Standard Deviation Value is:", delBGFactor)) - -# Set experiment # -if (length(args) >= 6) { - expNumber <- args[6] -} else { - # User prompt for std multiplier Value - print("Enter the experiment number (Exp# directory)") - expNumber <- readLines(file("stdin"), n = 1L) -} -expNumber <- as.numeric(expNumber) - -outDir_QC <- file.path(outDir, "QC") - -if (!file.exists(outDir)) { - dir.create(outDir) -} - -if (!file.exists(outDir_QC)) { - dir.create(file.path(outDir_QC)) +if (!dir.exists(out_dir_qc)) { + dir.create(out_dir_qc) } options(width = 1000) ls.str() -# Write delBJFactor to the StudyInfo file +# Write delBGFactor to the StudyInfo file # TODO we probably shouldn't be doing this, need one source of truth # TODO disabling this for now -# Labels <- read.csv(file = studyInfo, stringsAsFactors = FALSE) # sep = "," -# Labels[expNumber, 3] <- delBGFactor -# write.csv(Labels, file = studyInfo, row.names = FALSE) +# labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) # sep = "," +# labels[exp_number, 3] <- delta_bg_factor +# write.csv(Labels, file = study_info_file, row.names = FALSE) # Begin User Data Selection Section # Read in the data -X <- read.delim(inputFile, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) -X <- X[!(X[[1]] %in% c("", "Scan")), ] -# X <- X[!(X[[1]]%in%c(61:76)), ] #Remove dAmp plates which are Scans 61 thru 76 -# X <- X[which(X$Specifics == "WT"), ] -# X_length <- length(X[1, ]) -# X_end <- length(X[1, ]) - 2 -# X <- X[, c(1:42, X_end:X_length)] +df <- read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) +df <- df[!(df[[1]] %in% c("", "Scan")), ] +# df <- df[!(df[[1]]%in%c(61:76)), ] # remove dAmp plates which are Scans 61 thru 76 +# df <- df[which(df$Specifics == "WT"), ] +# df_length <- length(df[1, ]) +# df_end <- length(df[1, ]) - 2 +# df <- df[, c(1:42, df_end:df_length)] + +# print(names(df)) # Use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) +df$col <- as.numeric(df$Col) +df$row <- as.numeric(df$Row) +df$l <- as.numeric(df$l) +df$k <- as.numeric(df$K) +df$r <- as.numeric(df$r) +df$scan <- as.numeric(df$Scan) +df$auc <- as.numeric(df$AUC) +df$last_bg <- as.numeric(df$LstBackgrd) +df$first_bg <- as.numeric(df$X1stBackgrd) + +# print(df) # Sometimes the non-varying drug is in the 'Drug' col vs the 'Modifier1' col # as was the case in Gemcitabin and Cytarabin experiments. # The following allows user to rename columns so as to get the appropriate # data where it needs to be for the script to run properly. -# colnames(X)[7] <- "Modifier1" -# colnames(X)[8] <- "Conc1" -# colnames(X)[10] <- "Drug" -# colnames(X)[11] <- "Conc" +# colnames(df)[7] <- "Modifier1" +# colnames(df)[8] <- "Conc1" +# colnames(df)[10] <- "Drug" +# colnames(df)[11] <- "Conc" # Set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C", ]$OrfRep <- "YDL227C" +df[df$ORF == "YDL227C", ]$OrfRep <- "YDL227C" # Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. # That is the first DM plates are removed from the data set with the following. # This removes data with dox == 0 leaving gene expression on with four different concentrations of Gemcytabin -# X <- X[X$Conc1 != "0ug/ml", ] -X <- X[X$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment +# df <- df[df$Conc1 != "0ug/ml", ] +df <- df[df$Drug != "BMH21", ] # this removes data concerning BMH21 for this experiment # Mert placed the "bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. # This code removes those spots from the data set used for the interaction analysis. # Dr.Hartman feels that these donot effect Zscores significantly and so "non-curated" files were used. -# try(X <- X[X$ORF != "bad_spot", ]) +# try(df <- df[df$ORF != "bad_spot", ]) # Get total number of drug concentrations -total_conc_nums <- length(unique(X$Conc)) +total_conc_nums <- length(unique(df$Conc)) # Function to ID numbers in string with characters+numbers (ie to get numeric drug conc) numextract <- function(string) { @@ -143,53 +108,49 @@ numextract <- function(string) { } # Generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) +df$conc_num <- as.numeric(numextract(df$Conc)) # Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 +df$conc_num_factor <- as.numeric(as.factor(df$conc_num)) - 1 # Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) +max_conc <- max(df$conc_num_factor) # If treating numbers not as factors uncomment next line and comment out previous line -# MAX_CONC <- max(X$Conc_Num) +# max_conc <- max(df$conc_num) # Remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK", ] -X <- X[X$Gene != "Blank", ] -X <- X[X$ORF != "Blank", ] -X <- X[X$Gene != "blank", ] -# X <- X[X$Gene != "HO", ] -Xbu <- X +df <- df[df$Gene != "BLANK", ] +df <- df[df$Gene != "Blank", ] +df <- df[df$ORF != "Blank", ] +df <- df[df$Gene != "blank", ] +# df <- df[df$Gene != "HO", ] -# Use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). +# Use sgd_gene_list to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). # This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later # Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to overwrite the RF1&RF2 geneNames -# in the Z_lm_L, K, r&AUC output values. Values correlated well but were off by a multiplier factor. +# in the z_lm_l, k, r&auc output values. Values correlated well but were off by a multiplier factor. genes <- data.frame(read.delim( - file = SGDgeneList, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))) -for (i in 1:length(X[, 14])) { + file = sgd_gene_list, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))) +for (i in 1:length(df[, 14])) { ii <- as.numeric(i) - line_num <- match(X[ii, 14], genes[, 1], nomatch = 1) - OrfRepColNum <- as.numeric(match("OrfRep", names(X))) - if (X[ii, OrfRepColNum] != "YDL227C") { - X[ii, 15] <- genes[line_num, 2] + line_num <- match(df[ii, 14], genes[, 1], nomatch = 1) + orf_rep_col_num <- as.numeric(match("OrfRep", names(df))) + if (df[ii, orf_rep_col_num] != "YDL227C") { + df[ii, 15] <- genes[line_num, 2] } - if ((X[ii, 15] == "") || (X[ii, 15] == "OCT1")) { - X[ii, 15] <- X[ii, OrfRepColNum] + if ((df[ii, 15] == "") || (df[ii, 15] == "OCT1")) { + df[ii, 15] <- df[ii, orf_rep_col_num] } } -Xblankreplace <- X -# X = Xbu # for restore testing restore X if geneName 'Match' routine needs changing - # Remove dAmPs # jlh confirmed to leave dAmps in so comment out this section -# DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -# Damps <- read.delim(DAmPs_List, header = F) +# DAmPs_list <- "../Code/22_0602_Remy_DAmPsList.txt" +# Damps <- read.delim(DAmPs_list, header = F) -# X <- X[!(X$ORF %in% Damps$V1), ] # fix this to Damps[, 1] -# XafterDampsRM = X # backup for debugging +# df <- df[!(df$ORF %in% Damps$V1), ] # fix this to Damps[, 1] +# dfafterDampsRM = df # backup for debugging # Begin Graphics Boiler Plate Section # theme elements for plots @@ -231,7 +192,7 @@ scale_fill_publication <- function(...) { values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } -scale_colour_Publication <- function(...) { +scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } @@ -269,7 +230,7 @@ scale_fill_publication <- function(...) { values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } -scale_colour_Publication <- function(...) { +scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } @@ -285,8 +246,8 @@ timestamp() # Plate analysis plot # Plate analysis is a quality check to identify plate effects containing anomalies -Plate_Analysis_L <- - ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + +plate_analysis_l <- + ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -297,8 +258,8 @@ Plate_Analysis_L <- stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() -Plate_Analysis_K <- - ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) + +plate_analysis_k <- + ggplot(df, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -309,8 +270,8 @@ Plate_Analysis_K <- stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() -Plate_Analysis_r <- - ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) + +plate_analysis_r <- + ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -321,8 +282,8 @@ Plate_Analysis_r <- stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() -Plate_Analysis_AUC <- - ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc <- + ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -331,61 +292,61 @@ Plate_Analysis_AUC <- geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_publication() + ggtitle("Plate analysis by Drug Conc for auc before quality control") + theme_publication() -Plate_Analysis_L_Box <- - ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + +plate_analysis_l_box <- + ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() -Plate_Analysis_K_Box <- - ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + +plate_analysis_k_box <- + ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_publication() -Plate_Analysis_r_Box <- - ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + +plate_analysis_r_box <- + ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_publication() -Plate_Analysis_AUC_Box <- - ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc_box <- + ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + + ggtitle("Plate analysis by Drug Conc for auc before quality control") + theme_publication() # Quality control - values with a high delta background likely have heavy contamination # Check the frequency of these values -# Report the L and K values of these spots -# Report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd +# Report the L and k values of these spots +# Report the number to be removed based on the delta_background_tolerance +df$delta_bg <- df$last_bg - df$first_bg -# Raw l vs K before QC -Raw_l_vs_K_beforeQC <- - ggplot(X, aes(l, K, color = as.factor(Conc_Num))) + - geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + +# Raw l vs k before QC +raw_l_vs_k_before_qc <- + ggplot(df, aes(l, k, color = as.factor(conc_num))) + + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + ggtitle("Raw L vs K before QC") + theme_publication_legend_right() -pdf(file.path(outDir_QC, "Raw_L_vs_K_beforeQC.pdf"), width = 12, height = 8) -Raw_l_vs_K_beforeQC +pdf(file.path(out_dir_qc, "raw_l_vs_k_before_qc.pdf"), width = 12, height = 8) +raw_l_vs_k_before_qc dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_beforeQC.html") +pgg <- ggplotly(raw_l_vs_k_before_qc) +plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_before_qc.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd) + (delBGFactor * sd(X$Delta_Backgrd)) -# Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -sprintf("Delta_Background_Tolerance is %f", Delta_Background_Tolerance) +delta_background_tolerance <- mean(df$delta_bg) + (delta_bg_factor * sd(df$delta_bg)) +# delta_background_tolerance <- mean(df$delta_bg)+(3*sd(df$delta_bg)) +sprintf("delta_background_tolerance is %f", delta_background_tolerance) -Plate_Analysis_Delta_Backgrd <- - ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg <- + ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2, position = "jitter") + stat_summary( fun = mean, @@ -394,75 +355,75 @@ Plate_Analysis_Delta_Backgrd <- geom = "errorbar" ) + stat_summary(fun = mean, geom = "point", size = 0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + theme_publication() -Plate_Analysis_Delta_Backgrd_Box <- - ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg_box <- + ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg before quality control") + theme_publication() -X_Delta_Backgrd_above_Tolerance <- - X[X$Delta_Backgrd >= Delta_Background_Tolerance, ] -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- - (median(X_Delta_Backgrd_above_Tolerance$K, na.rm = TRUE)) / 2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- - (median(X_Delta_Backgrd_above_Tolerance$l, na.rm = TRUE)) / 2 -X_Delta_Backgrd_above_Tolerance_toRemove <- - dim(X_Delta_Backgrd_above_Tolerance)[1] +x_delta_bg_above_tolerance <- + df[df$delta_bg >= delta_background_tolerance, ] +x_delta_bg_above_tolerance_k_halfmedian <- + (median(x_delta_bg_above_tolerance$k, na.rm = TRUE)) / 2 +x_delta_bg_above_tolerance_l_halfmedian <- + (median(x_delta_bg_above_tolerance$l, na.rm = TRUE)) / 2 +x_delta_bg_above_tolerance_to_remove <- + dim(x_delta_bg_above_tolerance)[1] -X_Delta_Backgrd_above_Tolerance_L_vs_K <- - ggplot(X_Delta_Backgrd_above_Tolerance, aes(l, K, color = as.factor(Conc_Num))) + - geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of", Delta_Background_Tolerance, "or above")) + - annotate("text", x = X_Delta_Backgrd_above_Tolerance_L_halfmedian, y = X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ", X_Delta_Backgrd_above_Tolerance_toRemove) +x_delta_bg_above_tolerance_l_vs_k <- + ggplot(x_delta_bg_above_tolerance, aes(l, k, color = as.factor(conc_num))) + + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of", delta_background_tolerance, "or above")) + + annotate("text", x = x_delta_bg_above_tolerance_l_halfmedian, y = x_delta_bg_above_tolerance_k_halfmedian, + label = paste("Strains above delta background tolerance = ", x_delta_bg_above_tolerance_to_remove) ) + theme_publication_legend_right() -pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf"), width = 12, height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K +pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.pdf"), width = 12, height = 8) +x_delta_bg_above_tolerance_l_vs_k dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html") +pgg <- ggplotly(x_delta_bg_above_tolerance_l_vs_k) +plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_above_delta_background_threshold.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_density() + +delta_bg_frequency_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_density() + ggtitle("Density plot for Delta Background by Conc All Data") + theme_publication_legend_right() # Bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_bar() + +delta_bg_bar_plot <- ggplot(df, aes(delta_bg, color = as.factor(conc_num))) + geom_bar() + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right() -pdf(file.path(outDir_QC, "Frequency_Delta_Background.pdf"), width = 12, height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) +pdf(file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8) +print(delta_bg_frequency_plot) +print(delta_bg_bar_plot) dev.off() # Need to identify missing data, and differentiate between this data and removed data # so the removed data can get set to NA and the missing data can get set to max theoretical values # 1 for missing data, 0 for non missing data # Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l), ]$NG <- 1) +df$NG <- 0 +try(df[df$l == 0 & !is.na(df$l), ]$NG <- 1) # 1 for removed data, 0 non removed data # Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$DB <- 1) +df$DB <- 0 +try(df[df$delta_bg >= delta_background_tolerance, ]$DB <- 1) -# Replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance, ]$K <- NA) +# Replace the CPPs for l, r, auc and k (must be last!) for removed data +try(df[df$delta_bg >= delta_background_tolerance, ]$l <- NA) +try(df[df$delta_bg >= delta_background_tolerance, ]$r <- NA) +try(df[df$delta_bg >= delta_background_tolerance, ]$auc <- NA) +try(df[df$delta_bg >= delta_background_tolerance, ]$k <- NA) # QC Plots -Plate_Analysis_L_afterQC <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + +plate_analysis_l_after_qc <- ggplot(df, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), @@ -476,8 +437,8 @@ Plate_Analysis_L_afterQC <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) ) + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() -Plate_Analysis_K_afterQC <- - ggplot(X, aes(Scan, K, color = as.factor(Conc_Num))) + +plate_analysis_k_after_qc <- + ggplot(df, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -490,10 +451,10 @@ Plate_Analysis_K_afterQC <- geom = "point", size = 0.6 ) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() + ggtitle("Plate analysis by Drug Conc for k after quality control") + theme_publication() -Plate_Analysis_r_afterQC <- - ggplot(X, aes(Scan, r, color = as.factor(Conc_Num))) + +plate_analysis_r_after_qc <- + ggplot(df, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -508,8 +469,8 @@ Plate_Analysis_r_afterQC <- ) + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() -Plate_Analysis_AUC_afterQC <- - ggplot(X, aes(Scan, AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc_after_qc <- + ggplot(df, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -522,10 +483,10 @@ Plate_Analysis_AUC_afterQC <- geom = "point", size = 0.6 ) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_publication() + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() -Plate_Analysis_Delta_Backgrd_afterQC <- - ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg_after_qc <- + ggplot(df, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -538,71 +499,71 @@ Plate_Analysis_Delta_Backgrd_afterQC <- geom = "point", size = 0.6 ) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() -Plate_Analysis_L_Box_afterQC <- - ggplot(X, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + +plate_analysis_l_box_after_qc <- + ggplot(df, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() -Plate_Analysis_K_Box_afterQC <- - ggplot(X, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + +plate_analysis_k_box_after_qc <- + ggplot(df, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() -Plate_Analysis_r_Box_afterQC <- - ggplot(X, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + +plate_analysis_r_box_after_qc <- + ggplot(df, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() -Plate_Analysis_AUC_Box_afterQC <- - ggplot(X, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc_box_after_qc <- + ggplot(df, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() -Plate_Analysis_Delta_Backgrd_Box_afterQC <- - ggplot(X, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg_box_after_qc <- + ggplot(df, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() # Print the plate analysis data before and after QC -pdf(file.path(outDir_QC, "Plate_Analysis.pdf"), width = 14, height = 9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC +pdf(file.path(out_dir_qc, "plate_analysis.pdf"), width = 14, height = 9) +plate_analysis_l +plate_analysis_l_after_qc +plate_analysis_k +plate_analysis_k_after_qc +plate_analysis_r +plate_analysis_r_after_qc +plate_analysis_auc +plate_analysis_auc_after_qc +plate_analysis_delta_bg +plate_analysis_delta_bg_after_qc dev.off() # Print the plate analysis data before and after QC -pdf(file.path(outDir_QC, "Plate_Analysis_Boxplots.pdf"), width = 18, height = 9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC +pdf(file.path(out_dir_qc, "plate_analysis_boxplots.pdf"), width = 18, height = 9) +plate_analysis_l_box +plate_analysis_l_box_after_qc +plate_analysis_k_box +plate_analysis_k_box_after_qc +plate_analysis_r_box +plate_analysis_r_box_after_qc +plate_analysis_auc_box +plate_analysis_auc_box_after_qc +plate_analysis_delta_bg_box +plate_analysis_delta_bg_box_after_qc dev.off() # Remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0), ] -Plate_Analysis_L_afterQC_Z <- - ggplot(X_noZero, aes(Scan, l, color = as.factor(Conc_Num))) + +x_no_zero <- df[which(df$l > 0), ] +plate_analysis_l_after_qc_z <- + ggplot(x_no_zero, aes(Scan, l, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -618,8 +579,8 @@ Plate_Analysis_L_afterQC_Z <- ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() -Plate_Analysis_K_afterQC_Z <- - ggplot(X_noZero, aes(Scan, K, color = as.factor(Conc_Num))) + +plate_analysis_k_after_qc_z <- + ggplot(x_no_zero, aes(Scan, k, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -635,8 +596,8 @@ Plate_Analysis_K_afterQC_Z <- ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() -Plate_Analysis_r_afterQC_Z <- - ggplot(X_noZero, aes(Scan, r, color = as.factor(Conc_Num))) + +plate_analysis_r_after_qc_z <- + ggplot(x_no_zero, aes(Scan, r, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -652,8 +613,8 @@ Plate_Analysis_r_afterQC_Z <- ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() -Plate_Analysis_AUC_afterQC_Z <- - ggplot(X_noZero, aes(Scan, AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc_after_qc_z <- + ggplot(x_no_zero, aes(Scan, auc, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -666,11 +627,11 @@ Plate_Analysis_AUC_afterQC_Z <- geom = "point", size = 0.6 ) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() -Plate_Analysis_Delta_Backgrd_afterQC_Z <- - ggplot(X_noZero, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg_after_qc_z <- + ggplot(x_no_zero, aes(Scan, delta_bg, color = as.factor(conc_num))) + geom_point(shape = 3, size = 0.2) + stat_summary( fun = mean, @@ -683,123 +644,123 @@ Plate_Analysis_Delta_Backgrd_afterQC_Z <- geom = "point", size = 0.6 ) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() -Plate_Analysis_L_Box_afterQC_Z <- - ggplot(X_noZero, aes(as.factor(Scan), l, color = as.factor(Conc_Num))) + +plate_analysis_l_box_after_qc_z <- + ggplot(x_no_zero, aes(as.factor(Scan), l, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_publication() -Plate_Analysis_K_Box_afterQC_Z <- - ggplot(X_noZero, aes(as.factor(Scan), K, color = as.factor(Conc_Num))) + +plate_analysis_k_box_after_qc_z <- + ggplot(x_no_zero, aes(as.factor(Scan), k, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_publication() -Plate_Analysis_r_Box_afterQC_Z <- - ggplot(X_noZero, aes(as.factor(Scan), r, color = as.factor(Conc_Num))) + +plate_analysis_r_box_after_qc_z <- + ggplot(x_no_zero, aes(as.factor(Scan), r, color = as.factor(conc_num))) + geom_boxplot() + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_publication() -Plate_Analysis_AUC_Box_afterQC_Z <- - ggplot(X_noZero, aes(as.factor(Scan), AUC, color = as.factor(Conc_Num))) + +plate_analysis_auc_box_after_qc_z <- + ggplot(x_no_zero, aes(as.factor(Scan), auc, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + + ggtitle("Plate analysis by Drug Conc for auc after quality control") + theme_publication() -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- - ggplot(X_noZero, aes(as.factor(Scan), Delta_Backgrd, color = as.factor(Conc_Num))) + +plate_analysis_delta_bg_box_after_qc_z <- + ggplot(x_no_zero, aes(as.factor(Scan), delta_bg, color = as.factor(conc_num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + + ggtitle("Plate analysis by Drug Conc for delta_bg after quality control") + theme_publication() # Print the plate analysis data before and after QC -pdf(file.path(outDir_QC, "Plate_Analysis_noZeros.pdf"), width = 14, height = 9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z +pdf(file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9) +plate_analysis_l_after_qc_z +plate_analysis_k_after_qc_z +plate_analysis_r_after_qc_z +plate_analysis_auc_after_qc_z +plate_analysis_delta_bg_after_qc_z dev.off() # Print the plate analysis data before and after QC -pdf(file.path(outDir_QC, "Plate_Analysis_noZeros_Boxplots.pdf"), width = 18, height = 9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +pdf(file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9) +plate_analysis_l_box_after_qc_z +plate_analysis_k_box_after_qc_z +plate_analysis_r_box_after_qc_z +plate_analysis_auc_box_after_qc_z +plate_analysis_delta_bg_box_after_qc_z dev.off() # Remove dataset with zeros removed -rm(X_noZero) +rm(x_no_zero) -# X_test_missing_and_removed <- X[X$Removed == 1, ] +# df_test_missing_and_removed <- df[df$Removed == 1, ] # Calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply( - X, - c("Conc_Num", "Conc_Num_Factor"), +x_stats_all <- ddply( + df, + c("conc_num", "conc_num_factor"), summarise, N = (length(l)), - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - max_L = max(l, na.rm = TRUE), - min_L = min(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - se_K = sd_K / sqrt(N - 1), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + max_l = max(l, na.rm = TRUE), + min_l = min(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + se_l = sd_l / sqrt(N - 1), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + max_k = max(k, na.rm = TRUE), + min_k = min(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), + se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_AUC = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N - 1) + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + max_auc = max(auc, na.rm = TRUE), + min_auc = min(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), + se_auc = sd_auc / sqrt(N - 1) ) -# print(X_stats_ALL_L) -write.csv(X_stats_ALL, file.path(outDir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) +# print(x_stats_all_l) +write.csv(x_stats_all, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) # Part 3 - Generate summary statistics and calculate the max theoretical L value # Calculate the Z score at each drug conc for each deletion strain # Get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") +background_strains <- c("YDL227C") # First part of loop will go through for each background strain # In most cases there will only be one YDL227C -for (s in Background_Strains) { - X_Background <- X[X$OrfRep == s, ] +for (s in background_strains) { + x_background <- df[df$OrfRep == s, ] # If there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics # we may want to consider in some cases giving the max high value to L depending on the data type - if (table(X_Background$l)[1] == 0) { - X_Background[X_Background$l == 0, ]$l <- NA - X_Background[X_Background$K == 0, ]$K <- NA - X_Background[X_Background$r == 0, ]$r <- NA - X_Background[X_Background$AUC == 0, ]$AUC <- NA + if (table(x_background$l)[1] == 0) { + x_background[x_background$l == 0, ]$l <- NA + x_background[x_background$k == 0, ]$k <- NA + x_background[x_background$r == 0, ]$r <- NA + x_background[x_background$auc == 0, ]$auc <- NA } - X_Background <- X_Background[!is.na(X_Background$l), ] + x_background <- x_background[!is.na(x_background$l), ] - # Get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply( - X_Background, - c("OrfRep", "Conc_Num", "Conc_Num_Factor"), + # Get summary stats for L, k, R, auc + x_stats_by_l <- ddply( + x_background, + c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l, na.rm = TRUE), @@ -810,27 +771,27 @@ for (s in Background_Strains) { se = sd / sqrt(N - 1) ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) + print(x_stats_by_l) + x1_sd <- max(x_stats_by_l$sd) - X_stats_BY_K <- ddply( - X_Background, - c("OrfRep", "Conc_Num", "Conc_Num_Factor"), + x_stats_by_k <- ddply( + x_background, + c("OrfRep", "conc_num", "conc_num_factor"), summarise, - N = (length(K)), - mean = mean(K, na.rm = TRUE), - median = median(K, na.rm = TRUE), - max = max(K, na.rm = TRUE), - min = min(K, na.rm = TRUE), - sd = sd(K, na.rm = TRUE), + N = (length(k)), + mean = mean(k, na.rm = TRUE), + median = median(k, na.rm = TRUE), + max = max(k, na.rm = TRUE), + min = min(k, na.rm = TRUE), + sd = sd(k, na.rm = TRUE), se = sd / sqrt(N - 1) ) - X1_SD_K <- max(X_stats_BY_K$sd) + x1_sd_k <- max(x_stats_by_k$sd) - X_stats_BY_r <- ddply( - X_Background, - c("OrfRep", "Conc_Num", "Conc_Num_Factor"), + x_stats_by_r <- ddply( + x_background, + c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = length(r), mean = mean(r, na.rm = TRUE), @@ -841,116 +802,116 @@ for (s in Background_Strains) { se = sd / sqrt(N - 1) ) - X1_SD_r <- max(X_stats_BY_r$sd) + x1_sd_r <- max(x_stats_by_r$sd) - X_stats_BY_AUC <- ddply( - X_Background, - c("OrfRep", "Conc_Num", "Conc_Num_Factor"), + x_stats_by_auc <- ddply( + x_background, + c("OrfRep", "conc_num", "conc_num_factor"), summarise, - N = length(AUC), - mean = mean(AUC, na.rm = TRUE), - median = median(AUC, na.rm = TRUE), - max = max(AUC, na.rm = TRUE), - min = min(AUC, na.rm = TRUE), - sd = sd(AUC, na.rm = TRUE), + N = length(auc), + mean = mean(auc, na.rm = TRUE), + median = median(auc, na.rm = TRUE), + max = max(auc, na.rm = TRUE), + min = min(auc, na.rm = TRUE), + sd = sd(auc, na.rm = TRUE), se = sd / sqrt(N - 1) ) - X1_SD_AUC <- max(X_stats_BY_AUC$sd) + x1_sd_auc <- max(x_stats_by_auc$sd) - X_stats_BY <- ddply( - X_Background, - c("OrfRep", "Conc_Num", "Conc_Num_Factor"), + x_stats_by <- ddply( + x_background, + c("OrfRep", "conc_num", "conc_num_factor"), summarise, N = (length(l)), - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - max_L = max(l, na.rm = TRUE), - min_L = min(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - se_K = sd_K / sqrt(N - 1), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + max_l = max(l, na.rm = TRUE), + min_l = min(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + se_l = sd_l / sqrt(N - 1), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + max_k = max(k, na.rm = TRUE), + min_k = min(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), + se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_L = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N - 1) + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + max_auc = max(auc, na.rm = TRUE), + min_l = min(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), + se_auc = sd_auc / sqrt(N - 1) ) - write.csv(X_stats_BY, file.path(outDir, "SummaryStats_BackgroundStrains.csv"), row.names = FALSE) + write.csv(x_stats_by, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE) # Calculate the max theoretical L values - # Only look for max values when K is within 2SD of the ref strain - for (q in unique(X$Conc_Num_Factor)) { + # Only look for max values when k is within 2sd of the ref strain + for (q in unique(df$conc_num_factor)) { if (q == 0) { - X_within_2SD_K <- - X[X$Conc_Num_Factor == q, ] - X_within_2SD_K <- - X_within_2SD_K[!is.na(X_within_2SD_K$l), ] - X_stats_TEMP_K <- - X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] - X_within_2SD_K <- - X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ] - X_within_2SD_K <- - X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] - X_outside_2SD_K <- - X[X$Conc_Num_Factor == q, ] - X_outside_2SD_K <- - X_outside_2SD_K[!is.na(X_outside_2SD_K$l), ] - # X_outside_2SD_K_Temp <- - # X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] - X_outside_2SD_K <- - X_outside_2SD_K[ - X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) | - X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] - # X_outside_2SD_K <- - # X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])), ] + x_within_2sd_k <- + df[df$conc_num_factor == q, ] + x_within_2sd_k <- + x_within_2sd_k[!is.na(x_within_2sd_k$l), ] + x_stats_temp_k <- + x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] + x_within_2sd_k <- + x_within_2sd_k[x_within_2sd_k$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] + x_within_2sd_k <- + x_within_2sd_k[x_within_2sd_k$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] + x_outside_2sd_k <- + df[df$conc_num_factor == q, ] + x_outside_2sd_k <- + x_outside_2sd_k[!is.na(x_outside_2sd_k$l), ] + # x_outside_2sd_k_temp <- + # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] + x_outside_2sd_k <- + x_outside_2sd_k[ + x_outside_2sd_k$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | + x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] + # x_outside_2sd_k <- + # x_outside_2sd_k[x_outside_2sd_k$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])), ] } if (q > 0) { - X_within_2SD_K_temp <- - X[X$Conc_Num_Factor == q, ] - X_within_2SD_K_temp <- - X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l), ] - X_stats_TEMP_K <- - X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] - X_within_2SD_K_temp <- - X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])), ] - X_within_2SD_K_temp <- - X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] - X_within_2SD_K <- - rbind(X_within_2SD_K, X_within_2SD_K_temp) - X_outside_2SD_K_temp <- - X[X$Conc_Num_Factor == q, ] - X_outside_2SD_K_temp <- - X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l), ] - # X_outside_2SD_K_Temp <- - # X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q, ] - X_outside_2SD_K_temp <- - X_outside_2SD_K_temp[ - X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2 * X_stats_TEMP_K$sd[1])) | - X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2 * X_stats_TEMP_K$sd[1])), ] - # X_outside_2SD_K_temp <- - # X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) , ] - X_outside_2SD_K <- - rbind(X_outside_2SD_K, X_outside_2SD_K_temp) + x_within_2sd_k_temp <- + df[df$conc_num_factor == q, ] + x_within_2sd_k_temp <- + x_within_2sd_k_temp[!is.na(x_within_2sd_k_temp$l), ] + x_stats_temp_k <- + x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] + x_within_2sd_k_temp <- + x_within_2sd_k_temp[x_within_2sd_k_temp$k >= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])), ] + x_within_2sd_k_temp <- + x_within_2sd_k_temp[x_within_2sd_k_temp$k <= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] + x_within_2sd_k <- + rbind(x_within_2sd_k, x_within_2sd_k_temp) + x_outside_2sd_k_temp <- + df[df$conc_num_factor == q, ] + x_outside_2sd_k_temp <- + x_outside_2sd_k_temp[!is.na(x_outside_2sd_k_temp$l), ] + # x_outside_2sd_k_temp <- + # x_stats_by_k[x_stats_by_k$conc_num_factor == q, ] + x_outside_2sd_k_temp <- + x_outside_2sd_k_temp[ + x_outside_2sd_k_temp$k <= (x_stats_temp_k$mean[1] - (2 * x_stats_temp_k$sd[1])) | + x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2 * x_stats_temp_k$sd[1])), ] + # x_outside_2sd_k_temp <- + # x_outside_2sd_k_temp[x_outside_2sd_k_temp$k >= (x_stats_temp_k$mean[1] + (2*x_stats_temp_k$sd[1])) , ] + x_outside_2sd_k <- + rbind(x_outside_2sd_k, x_outside_2sd_k_temp) } } - X_stats_BY_L_within_2SD_K <- ddply( - X_within_2SD_K, - c("Conc_Num", "Conc_Num_Factor"), + x_stats_by_l_within_2sd_k <- ddply( + x_within_2sd_k, + c("conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l), @@ -962,19 +923,19 @@ for (s in Background_Strains) { z_max = (max - mean) / sd ) - print(X_stats_BY_L_within_2SD_K) + print(x_stats_by_l_within_2sd_k) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + x1_sd_within_2sd_k <- max(x_stats_by_l_within_2sd_k$sd) write.csv( - X_stats_BY_L_within_2SD_K, - file.path(outDir_QC, "Max_Observed_L_Vals_for_spots_within_2SD_K.csv"), + x_stats_by_l_within_2sd_k, + file.path(out_dir_qc, "max_observed_l_vals_for_spots_within_2sd_k.csv"), row.names = FALSE ) - X_stats_BY_L_outside_2SD_K <- ddply( - X_outside_2SD_K, - c("Conc_Num", "Conc_Num_Factor"), + x_stats_by_l_outside_2sd_k <- ddply( + x_outside_2sd_k, + c("conc_num", "conc_num_factor"), summarise, N = (length(l)), mean = mean(l), @@ -985,181 +946,181 @@ for (s in Background_Strains) { se = sd / sqrt(N - 1) ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + print(x_stats_by_l_outside_2sd_k) + x1_sd_outside_2sd_k <- max(x_stats_by_l_outside_2sd_k$sd) - # X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l, ] - Outside_2SD_K_L_vs_K <- - ggplot(X_outside_2SD_K, aes(l, K, color = as.factor(Conc_Num))) + - geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + + # x1_sd_outside_2sd_k <- df[df$l %in% x1_sd_within_2sd_k$l, ] + outside_2sd_k_l_vs_k <- + ggplot(x_outside_2sd_k, aes(l, k, color = as.factor(conc_num))) + + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = 3) + + ggtitle("Raw L vs K for strains falling outside 2sd of the K mean at each conc") + theme_publication_legend_right() - pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8) - print(Outside_2SD_K_L_vs_K) + pdf(file.path(out_dir_qc, "raw_l_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) + print(outside_2sd_k_l_vs_k) dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- file.path(outDir_QC, "RawL_vs_K_for_strains_outside_2SD_K.html") + pgg <- ggplotly(outside_2sd_k_l_vs_k) + plotly_path <- file.path(out_dir_qc, "raw_l_vs_k_for_strains_outside_2sd_k.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - Outside_2SD_K_delta_background_vs_K <- - ggplot(X_outside_2SD_K, aes(Delta_Backgrd, K, color = as.factor(Conc_Num))) + + outside_2sd_k_delta_background_vs_k <- + ggplot(x_outside_2sd_k, aes(delta_bg, k, color = as.factor(conc_num))) + geom_point(aes(l = l, ORF = ORF, Gene = Gene), shape = 3, position = "jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + + ggtitle("DeltaBackground vs K for strains falling outside 2sd of the K mean at each conc") + theme_publication_legend_right() - pdf(file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8) - Outside_2SD_K_delta_background_vs_K + pdf(file.path(out_dir_qc, "delta_background_vs_k_for_strains_2sd_outside_mean_k.pdf"), width = 10, height = 8) + outside_2sd_k_delta_background_vs_k dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + pgg <- ggplotly(outside_2sd_k_delta_background_vs_k) # pgg - plotly_path <- file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_outside_2SD_K.html") + plotly_path <- file.path(out_dir_qc, "delta_background_vs_k_for_strains_outside_2sd_k.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) # Get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] + background_l <- x_stats_by_l$mean[1] + background_k <- x_stats_by_k$mean[1] + background_r <- x_stats_by_r$mean[1] + background_auc <- x_stats_by_auc$mean[1] # Create empty plots for plotting element p_l <- ggplot() - p_K <- ggplot() + p_k <- ggplot() p_r <- ggplot() - p_AUC <- ggplot() + p_auc <- ggplot() p_rf_l <- ggplot() - p_rf_K <- ggplot() + p_rf_k <- ggplot() p_rf_r <- ggplot() - p_rf_AUC <- ggplot() + p_rf_auc <- ggplot() # Get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C", ] + df2 <- df + df2 <- df2[df2$OrfRep != "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max - X2$SM <- 0 + df2$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min - for (i in 1:length(unique(X2$Conc_Num))) { - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration, ] - if (Concentration == 0) { - X2_new <- X2_temp - sprintf("Check loop order, conc = %f", Concentration) + for (i in 1:length(unique(df2$conc_num))) { + concentration <- unique(df2$conc_num)[i] + df2_temp <- df2[df2$conc_num == concentration, ] + if (concentration == 0) { + df2_new <- df2_temp + sprintf("Check loop order, conc = %f", concentration) } - if (Concentration > 0) { - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) - # X2_temp[X2_temp$K == 0, ]$K <- X_stats_ALL_K$max[i] - # X2_temp[X2_temp$r == 0, ]$r <- X_stats_ALL_r$max[i] - # X2_temp[X2_temp$AUC == 0, ]$AUC <- X_stats_ALL_AUC$max[i] - sprintf("Check loop order, conc = %f", Concentration) - X2_new <- rbind(X2_new, X2_temp) + if (concentration > 0) { + try(df2_temp[df2_temp$l == 0 & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) + try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) + try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) + # df2_temp[df2_temp$k == 0, ]$k <- x_stats_all_k$max[i] + # df2_temp[df2_temp$r == 0, ]$r <- x_stats_all_r$max[i] + # df2_temp[df2_temp$auc == 0, ]$auc <- x_stats_all_auc$max[i] + sprintf("Check loop order, conc = %f", concentration) + df2_new <- rbind(df2_new, df2_temp) } } - X2 <- X2_new + df2 <- df2_new # Get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C", ] + df2_rf <- df + df2_rf <- df2_rf[df2_rf$OrfRep == "YDL227C", ] # If set to max theoretical value, add a 1 to SM, if not, leave as 0 # SM = Set to Max - X2_RF$SM <- 0 + df2_rf$SM <- 0 # Set the missing values to the highest theoretical value at each drug conc for L # Leave other values as 0 for the max/min - for (i in 1:length(unique(X2_RF$Conc_Num))) { - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration, ] - if (Concentration == 0) { - X2_RF_new <- X2_RF_temp - sprintf("Check loop order, conc = %f", Concentration) + for (i in 1:length(unique(df2_rf$conc_num))) { + concentration <- unique(df2_rf$conc_num)[i] + df2_rf_temp <- df2_rf[df2_rf$conc_num == concentration, ] + if (concentration == 0) { + df2_rf_new <- df2_rf_temp + sprintf("Check loop order, conc = %f", concentration) } - if (Concentration > 0) { - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i]) - sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", Concentration) - X2_RF_new <- rbind(X2_RF_new, X2_RF_temp) + if (concentration > 0) { + try(df2_rf_temp[df2_rf_temp$l == 0 & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) + try(df2_temp[df2_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_temp$l), ]$SM <- 1) + try(df2_rf_temp[df2_rf_temp$l >= x_stats_by_l_within_2sd_k$max[i] & !is.na(df2_rf_temp$l), ]$l <- x_stats_by_l_within_2sd_k$max[i]) + sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", concentration) + df2_rf_new <- rbind(df2_rf_new, df2_rf_temp) } } - X2_RF <- X2_RF_new + df2_rf <- df2_rf_new # Part 4 Get the RF Z score values # Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep, X2_RF$Gene, X2_RF$Num., sep = "_") + df2_rf$OrfRep <- paste(df2_rf$OrfRep, df2_rf$Gene, df2_rf$Num., sep = "_") - num_genes_RF <- length(unique(X2_RF$OrfRep)) - # print(num_genes_RF) + num_genes_rf <- length(unique(df2_rf$OrfRep)) + # print(num_genes_rf) # Create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - # InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA + interaction_scores_rf <- unique(df2_rf["OrfRep"]) + # interaction_scores_rf$Gene <- unique(df2$Gene) + interaction_scores_rf$Gene <- NA + interaction_scores_rf$Raw_Shift_l <- NA + interaction_scores_rf$z_shift_l <- NA + interaction_scores_rf$lm_Score_l <- NA + interaction_scores_rf$z_lm_l <- NA + interaction_scores_rf$R_Squared_l <- NA + interaction_scores_rf$Sum_z_Score_l <- NA + interaction_scores_rf$avg_zscore_l <- NA + interaction_scores_rf$Raw_Shift_k <- NA + interaction_scores_rf$z_shift_k <- NA + interaction_scores_rf$lm_Score_k <- NA + interaction_scores_rf$z_lm_k <- NA + interaction_scores_rf$R_Squared_k <- NA + interaction_scores_rf$Sum_z_Score_k <- NA + interaction_scores_rf$avg_zscore_k <- NA + interaction_scores_rf$Raw_Shift_r <- NA + interaction_scores_rf$z_shift_r <- NA + interaction_scores_rf$lm_Score_r <- NA + interaction_scores_rf$z_lm_r <- NA + interaction_scores_rf$R_Squared_r <- NA + interaction_scores_rf$Sum_z_Score_r <- NA + interaction_scores_rf$avg_zscore_r <- NA + interaction_scores_rf$Raw_Shift_auc <- NA + interaction_scores_rf$z_shift_auc <- NA + interaction_scores_rf$lm_Score_auc <- NA + interaction_scores_rf$z_lm_auc <- NA + interaction_scores_rf$R_Squared_auc <- NA + interaction_scores_rf$Sum_z_Score_auc <- NA + interaction_scores_rf$avg_zscore_auc <- NA + interaction_scores_rf$NG <- NA + interaction_scores_rf$SM <- NA - for (i in 1:num_genes_RF) { + for (i in 1:num_genes_rf) { # Get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] + gene_sel <- unique(df2_rf$OrfRep)[i] # Extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel, ] + x_gene_sel <- df2_rf[df2_rf$OrfRep == gene_sel, ] - X_stats_interaction <- ddply( - X_Gene_Sel, - c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"), + x_stats_interaction <- ddply( + x_gene_sel, + c("OrfRep", "Gene", "conc_num", "conc_num_factor"), summarise, N = (length(l)), - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - se_K = sd_K / sqrt(N - 1), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + se_l = sd_l / sqrt(N - 1), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), + se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N - 1), + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), + se_auc = sd_auc / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) @@ -1170,459 +1131,459 @@ for (s in Background_Strains) { # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc - if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) { - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 + if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { + x_stats_interaction$Raw_Shift_l <- 0 + x_stats_interaction$Raw_Shift_k <- 0 + x_stats_interaction$Raw_Shift_r <- 0 + x_stats_interaction$Raw_Shift_auc <- 0 + x_stats_interaction$z_shift_l <- 0 + x_stats_interaction$z_shift_k <- 0 + x_stats_interaction$z_shift_r <- 0 + x_stats_interaction$z_shift_auc <- 0 } else { - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1] + x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l + x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k + x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r + x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc + x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1] + x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1] + x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1] + x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1] } # Get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + x_stats_interaction$WT_l <- x_stats_by_l$mean + x_stats_interaction$WT_k <- x_stats_by_k$mean + x_stats_interaction$WT_r <- x_stats_by_r$mean + x_stats_interaction$WT_auc <- x_stats_by_auc$mean # Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + x_stats_interaction$WT_sd_l <- x_stats_by_l$sd + x_stats_interaction$WT_sd_k <- x_stats_by_k$sd + x_stats_interaction$WT_sd_r <- x_stats_by_r$sd + x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd # Only get scores if there's growth at no drug - if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) { + if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l + x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k + x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r + x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l + x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k + x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r + x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for no growth values in Z score calculation - if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC + if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation - if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l + if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AU + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_AU } # Calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) + x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) + x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) + x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) + x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # Get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction) + gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) + gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) + gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) + gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] + r_squared_l <- summary(gene_lm_l)$r.squared + gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] + r_squared_k <- summary(gene_lm_k)$r.squared + gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared + gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] + r_squared_auc <- summary(gene_lm_auc)$r.squared # Get total of non removed values - Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1 + num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_Gene_Sel$Gene[1] - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- - gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- + interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- + x_gene_sel$OrfRep[1] + interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- + x_gene_sel$Gene[1] + interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_l[1] + interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_l[1] + interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- + gene_interaction_l + interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_L, na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc) - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_K, na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc) - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- + interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_l, na.rm = TRUE) + interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) + interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_k[1] + interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_k[1] + interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- + gene_interaction_k + interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <- + r_squared_k + interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_k, na.rm = TRUE) + interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) + interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_r[1] + interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_r[1] + interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- + interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_r, na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1) - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1) + interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_r, na.rm = TRUE) + interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) + interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_auc[1] + interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_auc[1] + interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- + gene_interaction_auc + interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- + r_squared_auc + interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_auc, na.rm = TRUE) + interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) } - if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) { + if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l + x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k + x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r + x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l + x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k + x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r + x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for missing values in Z score calculation - if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC + if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation - if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l + if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) + x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) + x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) + x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) + x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA + gene_lm_l <- NA + gene_lm_k <- NA gene_lm_r <- NA - gene_lm_AUC <- NA - gene_interaction_L <- NA + gene_lm_auc <- NA + gene_interaction_l <- NA r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA + gene_interaction_k <- NA + r_squared_k <- NA gene_interaction_r <- NA r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + gene_interaction_auc <- NA + r_squared_auc <- NA + x_stats_interaction$Raw_Shift_l <- NA + x_stats_interaction$Raw_Shift_k <- NA + x_stats_interaction$Raw_Shift_r <- NA + x_stats_interaction$Raw_Shift_auc <- NA + x_stats_interaction$z_shift_l <- NA + x_stats_interaction$z_shift_k <- NA + x_stats_interaction$z_shift_r <- NA + x_stats_interaction$z_shift_auc <- NA + interaction_scores_rf$OrfRep[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$OrfRep[1] + interaction_scores_rf$Gene[interaction_scores_rf$OrfRep == gene_sel] <- x_gene_sel$Gene[1] + interaction_scores_rf$Raw_Shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] + interaction_scores_rf$z_shift_l[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] + interaction_scores_rf$lm_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_l + interaction_scores_rf$R_Squared_l[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_l + interaction_scores_rf$Sum_z_Score_l[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$avg_zscore_l[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$Raw_Shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] + interaction_scores_rf$z_shift_k[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] + interaction_scores_rf$lm_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_k + interaction_scores_rf$R_Squared_k[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_k + interaction_scores_rf$Sum_z_Score_k[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$avg_zscore_k[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$Raw_Shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] + interaction_scores_rf$z_shift_r[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] + interaction_scores_rf$lm_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_r + interaction_scores_rf$R_Squared_r[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_r + interaction_scores_rf$Sum_z_Score_r[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$avg_zscore_r[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$Raw_Shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] + interaction_scores_rf$z_shift_auc[interaction_scores_rf$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] + interaction_scores_rf$lm_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- gene_interaction_auc + interaction_scores_rf$R_Squared_auc[interaction_scores_rf$OrfRep == gene_sel] <- r_squared_auc + interaction_scores_rf$Sum_z_Score_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA + interaction_scores_rf$avg_zscore_auc[interaction_scores_rf$OrfRep == gene_sel] <- NA } if (i == 1) { - X_stats_interaction_ALL_RF <- X_stats_interaction + x_stats_interaction_all_rf <- x_stats_interaction } if (i > 1) { - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF, X_stats_interaction) + x_stats_interaction_all_rf <- rbind(x_stats_interaction_all_rf, x_stats_interaction) } - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE) + interaction_scores_rf$NG[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) + interaction_scores_rf$DB[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) + interaction_scores_rf$SM[interaction_scores_rf$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) - # X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int) + # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) } print("Pass RF Calculation loop") - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L, na.rm = TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K, na.rm = TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r, na.rm = TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE) - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L, na.rm = TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K, na.rm = TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r, na.rm = TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC, na.rm = TRUE) + lm_sd_l <- sd(interaction_scores_rf$lm_Score_l, na.rm = TRUE) + lm_sd_k <- sd(interaction_scores_rf$lm_Score_k, na.rm = TRUE) + lm_sd_r <- sd(interaction_scores_rf$lm_Score_r, na.rm = TRUE) + lm_sd_auc <- sd(interaction_scores_rf$lm_Score_auc, na.rm = TRUE) + lm_mean_l <- mean(interaction_scores_rf$lm_Score_l, na.rm = TRUE) + lm_mean_k <- mean(interaction_scores_rf$lm_Score_k, na.rm = TRUE) + lm_mean_r <- mean(interaction_scores_rf$lm_Score_r, na.rm = TRUE) + lm_mean_auc <- mean(interaction_scores_rf$lm_Score_auc, na.rm = TRUE) - print(paste("Mean RF linear regression score L", lm_mean_L)) + print(paste("Mean RF linear regression score L", lm_mean_l)) - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L) / (lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K) / (lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r) / (lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC) / (lm_sd_AUC) - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L, decreasing = TRUE), ] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG, decreasing = TRUE), ] - write.csv(InteractionScores_RF, file.path(outDir, "RF_ZScores_Interaction.csv"), row.names = FALSE) + interaction_scores_rf$z_lm_l <- (interaction_scores_rf$lm_Score_l - lm_mean_l) / (lm_sd_l) + interaction_scores_rf$z_lm_k <- (interaction_scores_rf$lm_Score_k - lm_mean_k) / (lm_sd_k) + interaction_scores_rf$z_lm_r <- (interaction_scores_rf$lm_Score_r - lm_mean_r) / (lm_sd_r) + interaction_scores_rf$z_lm_auc <- (interaction_scores_rf$lm_Score_auc - lm_mean_auc) / (lm_sd_auc) + interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$z_lm_l, decreasing = TRUE), ] + interaction_scores_rf <- interaction_scores_rf[order(interaction_scores_rf$NG, decreasing = TRUE), ] + write.csv(interaction_scores_rf, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) - for (i in 1:num_genes_RF) { - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel, ] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel, ] + for (i in 1:num_genes_rf) { + gene_sel <- unique(interaction_scores_rf$OrfRep)[i] + x_z_calculations <- x_stats_interaction_all_rf[x_stats_interaction_all_rf$OrfRep == gene_sel, ] + df_int_scores <- interaction_scores_rf[interaction_scores_rf$OrfRep == gene_sel, ] - p_rf_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) + + p_rf_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(X_Int_Scores$Avg_Zscore_L, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(X_Int_Scores$Z_lm_L, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 35, label = paste("Avg Zscore =", round(df_int_scores$avg_zscore_l, 2))) + + annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$z_lm_l, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() - p_rf_K[[i]] <- ggplot( - X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) + + p_rf_k[[i]] <- ggplot( + x_z_calculations, aes(conc_num_factor, delta_k)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_L, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + + annotate("text", x = 1, y = 25, label = paste("lm ZScore =", round(df_int_scores$z_lm_l, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() p_rf_r[[i]] <- ggplot( - X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) + + x_z_calculations, aes(conc_num_factor, delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + + annotate("text", x = 1, y = 0.25, label = paste("lm ZScore =", round(df_int_scores$z_lm_r, 2))) + + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() - p_rf_AUC[[i]] <- ggplot( - X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) + + p_rf_auc[[i]] <- ggplot( + x_z_calculations, aes(conc_num_factor, delta_auc)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) + - annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(X_Int_Scores$Z_lm_AUC, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + + annotate("text", x = 1, y = 2500, label = paste("lm ZScore =", round(df_int_scores$z_lm_auc, 2))) + + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num)) ) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { - X_stats_interaction_ALL_RF_final <- X_ZCalculations + x_stats_interaction_all_rf_final <- x_z_calculations } if (i > 1) { - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final, X_ZCalculations) + x_stats_interaction_all_rf_final <- rbind(x_stats_interaction_all_rf_final, x_z_calculations) } } print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final, file.path(outDir, "RF_ZScore_Calculations.csv"), row.names = FALSE) + write.csv(x_stats_interaction_all_rf_final, file.path(out_dir, "rf_zscore_calculations.csv"), row.names = FALSE) # Part 5 - Get Zscores for Gene deletion strains # Get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) + num_genes <- length(unique(df2$OrfRep)) # print(num_genes) # Create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - # InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA + interaction_scores <- unique(df2["OrfRep"]) + # interaction_scores$Gene <- unique(df2$Gene) + interaction_scores$Gene <- NA + interaction_scores$Raw_Shift_l <- NA + interaction_scores$z_shift_l <- NA + interaction_scores$lm_Score_l <- NA + interaction_scores$z_lm_l <- NA + interaction_scores$R_Squared_l <- NA + interaction_scores$Sum_z_Score_l <- NA + interaction_scores$avg_zscore_l <- NA + interaction_scores$Raw_Shift_k <- NA + interaction_scores$z_shift_k <- NA + interaction_scores$lm_Score_k <- NA + interaction_scores$z_lm_k <- NA + interaction_scores$R_Squared_k <- NA + interaction_scores$Sum_z_Score_k <- NA + interaction_scores$avg_zscore_k <- NA + interaction_scores$Raw_Shift_r <- NA + interaction_scores$z_shift_r <- NA + interaction_scores$lm_Score_r <- NA + interaction_scores$z_lm_r <- NA + interaction_scores$R_Squared_r <- NA + interaction_scores$Sum_z_Score_r <- NA + interaction_scores$avg_zscore_r <- NA + interaction_scores$Raw_Shift_auc <- NA + interaction_scores$z_shift_auc <- NA + interaction_scores$lm_Score_auc <- NA + interaction_scores$z_lm_auc <- NA + interaction_scores$R_Squared_auc <- NA + interaction_scores$Sum_z_Score_auc <- NA + interaction_scores$avg_zscore_auc <- NA + interaction_scores$NG <- NA + interaction_scores$DB <- NA + interaction_scores$SM <- NA for (i in 1:num_genes) { # Get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] + gene_sel <- unique(df2$OrfRep)[i] # Extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel, ] + x_gene_sel <- df2[df2$OrfRep == gene_sel, ] - X_stats_interaction <- ddply( - X_Gene_Sel, - c("OrfRep", "Gene", "Conc_Num", "Conc_Num_Factor"), + x_stats_interaction <- ddply( + x_gene_sel, + c("OrfRep", "Gene", "conc_num", "conc_num_factor"), summarise, N = (length(l)), - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - se_K = sd_K / sqrt(N - 1), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + se_l = sd_l / sqrt(N - 1), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), + se_k = sd_k / sqrt(N - 1), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N - 1), + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), + se_auc = sd_auc / sqrt(N - 1), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) @@ -1633,526 +1594,526 @@ for (s in Background_Strains) { # if L is NA at 0, that means the spot was removed due to contamination # if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift # otherwise calculate shift at no drug conc - if (is.na(X_stats_interaction$mean_L[1]) || X_stats_interaction$mean_L[1] == 0) { - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 + if (is.na(x_stats_interaction$mean_l[1]) || x_stats_interaction$mean_l[1] == 0) { + x_stats_interaction$Raw_Shift_l <- 0 + x_stats_interaction$Raw_Shift_k <- 0 + x_stats_interaction$Raw_Shift_r <- 0 + x_stats_interaction$Raw_Shift_auc <- 0 + x_stats_interaction$z_shift_l <- 0 + x_stats_interaction$z_shift_k <- 0 + x_stats_interaction$z_shift_r <- 0 + x_stats_interaction$z_shift_auc <- 0 } else { - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1] / X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1] / X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1] / X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1] / X_stats_BY_AUC$sd[1] + x_stats_interaction$Raw_Shift_l <- x_stats_interaction$mean_l[1] - background_l + x_stats_interaction$Raw_Shift_k <- x_stats_interaction$mean_k[1] - background_k + x_stats_interaction$Raw_Shift_r <- x_stats_interaction$mean_r[1] - background_r + x_stats_interaction$Raw_Shift_auc <- x_stats_interaction$mean_auc[1] - background_auc + x_stats_interaction$z_shift_l <- x_stats_interaction$Raw_Shift_l[1] / x_stats_by_l$sd[1] + x_stats_interaction$z_shift_k <- x_stats_interaction$Raw_Shift_k[1] / x_stats_by_k$sd[1] + x_stats_interaction$z_shift_r <- x_stats_interaction$Raw_Shift_r[1] / x_stats_by_r$sd[1] + x_stats_interaction$z_shift_auc <- x_stats_interaction$Raw_Shift_auc[1] / x_stats_by_auc$sd[1] } # Get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + x_stats_interaction$WT_l <- x_stats_by_l$mean + x_stats_interaction$WT_k <- x_stats_by_k$mean + x_stats_interaction$WT_r <- x_stats_by_r$mean + x_stats_interaction$WT_auc <- x_stats_by_auc$mean # Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + x_stats_interaction$WT_sd_l <- x_stats_by_l$sd + x_stats_interaction$WT_sd_k <- x_stats_by_k$sd + x_stats_interaction$WT_sd_r <- x_stats_by_r$sd + x_stats_interaction$WT_sd_auc <- x_stats_by_auc$sd # Only get scores if there's growth at no drug - if (X_stats_interaction$mean_L[1] != 0 && !is.na(X_stats_interaction$mean_L[1])) { + if (x_stats_interaction$mean_l[1] != 0 && !is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l + x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k + x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r + x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l + x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k + x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r + x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for no growth values in Z score calculation - if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC + if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation - if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l + if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) + x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) + x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) + x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) + x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # Get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor, data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor, data = X_stats_interaction) + gene_lm_l <- lm(formula = delta_l ~ conc_num_factor, data = x_stats_interaction) + gene_lm_k <- lm(formula = delta_k ~ conc_num_factor, data = x_stats_interaction) + gene_lm_r <- lm(formula = delta_r ~ conc_num_factor, data = x_stats_interaction) + gene_lm_auc <- lm(formula = delta_auc ~ conc_num_factor, data = x_stats_interaction) # Get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + gene_interaction_l <- max_conc * (gene_lm_l$coefficients[2]) + gene_lm_l$coefficients[1] + r_squared_l <- summary(gene_lm_l)$r.squared + gene_interaction_k <- max_conc * (gene_lm_k$coefficients[2]) + gene_lm_k$coefficients[1] + r_squared_k <- summary(gene_lm_k)$r.squared + gene_interaction_r <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC * (gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared + gene_interaction_auc <- max_conc * (gene_lm_r$coefficients[2]) + gene_lm_auc$coefficients[1] + r_squared_auc <- summary(gene_lm_auc)$r.squared # Get total of non removed values - Num_non_Removed_Conc <- total_conc_nums - sum(X_stats_interaction$DB, na.rm = TRUE) - 1 + num_non_removed_conc <- total_conc_nums - sum(x_stats_interaction$DB, na.rm = TRUE) - 1 # Report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- - as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- - as.character(X_Gene_Sel$Gene[1]) - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- - gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- - (gene_interaction_L - lm_mean_L) / lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- + interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- + as.character(x_gene_sel$OrfRep[1]) + interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- + as.character(x_gene_sel$Gene[1]) + interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_l[1] + interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_l[1] + interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <- + gene_interaction_l + interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- + (gene_interaction_l - lm_mean_l) / lm_sd_l + interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_L, na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_L, na.rm = TRUE) / (Num_non_Removed_Conc) - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- - gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- - (gene_interaction_K - lm_mean_K) / lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- - r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_K, na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_K, na.rm = TRUE) / (Num_non_Removed_Conc) - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- + interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_l, na.rm = TRUE) + interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_l, na.rm = TRUE) / (num_non_removed_conc) + interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_k[1] + interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_k[1] + interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <- + gene_interaction_k + interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- + (gene_interaction_k - lm_mean_k) / lm_sd_k + interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <- + r_squared_k + interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_k, na.rm = TRUE) + interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_k, na.rm = TRUE) / (num_non_removed_conc) + interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_r[1] + interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_r[1] + interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- + interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- (gene_interaction_r - lm_mean_r) / lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- + interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_r, na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1) - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- - X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- - gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- - (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- - r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- - sum(X_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1) + interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_r, na.rm = TRUE) + interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_r, na.rm = TRUE) / (total_conc_nums - 1) + interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$Raw_Shift_auc[1] + interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- + x_stats_interaction$z_shift_auc[1] + interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <- + gene_interaction_auc + interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- + (gene_interaction_auc - lm_mean_auc) / lm_sd_auc + interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <- + r_squared_auc + interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_auc, na.rm = TRUE) + interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- + sum(x_stats_interaction$zscore_auc, na.rm = TRUE) / (total_conc_nums - 1) } - if (X_stats_interaction$mean_L[1] == 0 || is.na(X_stats_interaction$mean_L[1])) { + if (x_stats_interaction$mean_l[1] == 0 || is.na(x_stats_interaction$mean_l[1])) { # Calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + x_stats_interaction$Exp_l <- x_stats_interaction$WT_l + x_stats_interaction$Raw_Shift_l + x_stats_interaction$Exp_k <- x_stats_interaction$WT_k + x_stats_interaction$Raw_Shift_k + x_stats_interaction$Exp_r <- x_stats_interaction$WT_r + x_stats_interaction$Raw_Shift_r + x_stats_interaction$Exp_auc <- x_stats_interaction$WT_auc + x_stats_interaction$Raw_Shift_auc # Calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <- X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + x_stats_interaction$delta_l <- x_stats_interaction$mean_l - x_stats_interaction$Exp_l + x_stats_interaction$delta_k <- x_stats_interaction$mean_k - x_stats_interaction$Exp_k + x_stats_interaction$delta_r <- x_stats_interaction$mean_r - x_stats_interaction$Exp_r + x_stats_interaction$delta_auc <- x_stats_interaction$mean_auc - x_stats_interaction$Exp_auc # Disregard shift for missing values in Z score calculatiom - if (sum(X_stats_interaction$NG, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_K <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_r <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1, ]$Delta_AUC <- - X_stats_interaction[X_stats_interaction$NG == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1, ]$WT_AUC + if (sum(x_stats_interaction$NG, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_l + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_k <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_k + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_r <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_r + x_stats_interaction[x_stats_interaction$NG == 1, ]$delta_auc <- + x_stats_interaction[x_stats_interaction$NG == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$NG == 1, ]$WT_auc } # Disregard shift for set to max values in Z score calculation - if (sum(X_stats_interaction$SM, na.rm = TRUE) > 0) { - X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_L <- - X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_l + if (sum(x_stats_interaction$SM, na.rm = TRUE) > 0) { + x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_l <- + x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_l - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_l # Only calculate the L value without shift since L is the only adjusted value - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_K <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_K - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_r <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_r - # X_stats_interaction[X_stats_interaction$SM == 1, ]$Delta_AUC <- - # X_stats_interaction[X_stats_interaction$SM == 1, ]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1, ]$WT_AUC + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_k <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_k - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_k + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_r <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_r - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_r + # x_stats_interaction[x_stats_interaction$SM == 1, ]$delta_auc <- + # x_stats_interaction[x_stats_interaction$SM == 1, ]$mean_auc - x_stats_interaction[x_stats_interaction$SM == 1, ]$WT_auc } # Calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L) / (X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K) / (X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r) / (X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC) / (X_stats_interaction$WT_sd_AUC) + x_stats_interaction$zscore_l <- (x_stats_interaction$delta_l) / (x_stats_interaction$WT_sd_l) + x_stats_interaction$zscore_k <- (x_stats_interaction$delta_k) / (x_stats_interaction$WT_sd_k) + x_stats_interaction$zscore_r <- (x_stats_interaction$delta_r) / (x_stats_interaction$WT_sd_r) + x_stats_interaction$zscore_auc <- (x_stats_interaction$delta_auc) / (x_stats_interaction$WT_sd_auc) # NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA + gene_lm_l <- NA + gene_lm_k <- NA gene_lm_r <- NA - gene_interaction_L <- NA + gene_interaction_l <- NA r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA + gene_interaction_k <- NA + r_squared_k <- NA gene_interaction_r <- NA r_squared_r <- NA - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + x_stats_interaction$Raw_Shift_l <- NA + x_stats_interaction$Raw_Shift_k <- NA + x_stats_interaction$Raw_Shift_r <- NA + x_stats_interaction$Raw_Shift_auc <- NA + x_stats_interaction$z_shift_l <- NA + x_stats_interaction$z_shift_k <- NA + x_stats_interaction$z_shift_r <- NA + x_stats_interaction$z_shift_auc <- NA + interaction_scores$OrfRep[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$OrfRep[1]) + interaction_scores$Gene[interaction_scores$OrfRep == gene_sel] <- as.character(x_gene_sel$Gene[1]) + interaction_scores$Raw_Shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_l[1] + interaction_scores$z_shift_l[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_l[1] + interaction_scores$lm_Score_l[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$z_lm_l[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$R_Squared_l[interaction_scores$OrfRep == gene_sel] <- r_squared_l + interaction_scores$Sum_z_Score_l[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$avg_zscore_l[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$Raw_Shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_k[1] + interaction_scores$z_shift_k[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_k[1] + interaction_scores$lm_Score_k[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$z_lm_k[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$R_Squared_k[interaction_scores$OrfRep == gene_sel] <- r_squared_k + interaction_scores$Sum_z_Score_k[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$avg_zscore_k[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$Raw_Shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_r[1] + interaction_scores$z_shift_r[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_r[1] + interaction_scores$lm_Score_r[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$z_lm_r[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$R_Squared_r[interaction_scores$OrfRep == gene_sel] <- r_squared_r + interaction_scores$Sum_z_Score_r[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$avg_zscore_r[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$Raw_Shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$Raw_Shift_auc[1] + interaction_scores$z_shift_auc[interaction_scores$OrfRep == gene_sel] <- x_stats_interaction$z_shift_auc[1] + interaction_scores$lm_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$z_lm_auc[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$R_Squared_auc[interaction_scores$OrfRep == gene_sel] <- r_squared_auc + interaction_scores$Sum_z_Score_auc[interaction_scores$OrfRep == gene_sel] <- NA + interaction_scores$avg_zscore_auc[interaction_scores$OrfRep == gene_sel] <- NA } if (i == 1) { - X_stats_interaction_ALL <- X_stats_interaction + x_stats_interaction_all <- x_stats_interaction } if (i > 1) { - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL, X_stats_interaction) + x_stats_interaction_all <- rbind(x_stats_interaction_all, x_stats_interaction) } - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG, na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB, na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM, na.rm = TRUE) + interaction_scores$NG[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$NG, na.rm = TRUE) + interaction_scores$DB[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$DB, na.rm = TRUE) + interaction_scores$SM[interaction_scores$OrfRep == gene_sel] <- sum(x_stats_interaction$SM, na.rm = TRUE) - # X_stats_L_int_temp <- rbind(X_stats_L_int_temp, X_stats_L_int) + # x_stats_l_int_temp <- rbind(x_stats_l_int_temp, x_stats_l_int) } print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L, decreasing = TRUE), ] - InteractionScores <- InteractionScores[order(InteractionScores$NG, decreasing = TRUE), ] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - # X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG, decreasing = TRUE), ] - write.csv(InteractionScores, file.path(outDir, "ZScores_Interaction.csv"), row.names = FALSE) + interaction_scores <- interaction_scores[order(interaction_scores$z_lm_l, decreasing = TRUE), ] + interaction_scores <- interaction_scores[order(interaction_scores$NG, decreasing = TRUE), ] + df_order_by_OrfRep <- unique(interaction_scores$OrfRep) + # x_stats_interaction_all <- x_stats_interaction_all[order(x_stats_interaction_all$NG, decreasing = TRUE), ] + write.csv(interaction_scores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) - InteractionScores_deletion_enhancers_L <- - InteractionScores[InteractionScores$Avg_Zscore_L >= 2, ] - InteractionScores_deletion_enhancers_K <- - InteractionScores[InteractionScores$Avg_Zscore_K <= -2, ] - InteractionScores_deletion_suppressors_L <- - InteractionScores[InteractionScores$Avg_Zscore_L <= -2, ] - InteractionScores_deletion_suppressors_K <- - InteractionScores[InteractionScores$Avg_Zscore_K >= 2, ] - InteractionScores_deletion_enhancers_and_Suppressors_L <- - InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2, ] - InteractionScores_deletion_enhancers_and_Suppressors_K <- - InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2, ] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- - InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2, ] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- - InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2, ] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- - InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2, ] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- - InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2, ] - InteractionScores_deletion_enhancers_L <- - InteractionScores_deletion_enhancers_L[ - !is.na(InteractionScores_deletion_enhancers_L$OrfRep), ] - InteractionScores_deletion_enhancers_K <- - InteractionScores_deletion_enhancers_K[ - !is.na(InteractionScores_deletion_enhancers_K$OrfRep), ] - InteractionScores_deletion_suppressors_L <- - InteractionScores_deletion_suppressors_L[ - !is.na(InteractionScores_deletion_suppressors_L$OrfRep), ] - InteractionScores_deletion_suppressors_K <- - InteractionScores_deletion_suppressors_K[ - !is.na(InteractionScores_deletion_suppressors_K$OrfRep), ] - InteractionScores_deletion_enhancers_and_Suppressors_L <- - InteractionScores_deletion_enhancers_and_Suppressors_L[ - !is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep), ] - InteractionScores_deletion_enhancers_and_Suppressors_K <- - InteractionScores_deletion_enhancers_and_Suppressors_K[ - !is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep), ] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[ - !is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep), ] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[ - !is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep), ] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[ - !is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep), ] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[ - !is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep), ] - write.csv(InteractionScores_deletion_enhancers_L, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_K, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_suppressors_L, - file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_suppressors_K, - file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L, - file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L, - file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K, - file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K, - file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv"), row.names = FALSE) + interaction_scores_deletion_enhancers_l <- + interaction_scores[interaction_scores$avg_zscore_l >= 2, ] + interaction_scores_deletion_enhancers_k <- + interaction_scores[interaction_scores$avg_zscore_k <= -2, ] + interaction_scores_deletion_suppressors_l <- + interaction_scores[interaction_scores$avg_zscore_l <= -2, ] + interaction_scores_deletion_suppressors_k <- + interaction_scores[interaction_scores$avg_zscore_k >= 2, ] + interaction_scores_deletion_enhancers_and_suppressors_l <- + interaction_scores[interaction_scores$avg_zscore_l >= 2 | interaction_scores$avg_zscore_l <= -2, ] + interaction_scores_deletion_enhancers_and_suppressors_k <- + interaction_scores[interaction_scores$avg_zscore_k >= 2 | interaction_scores$avg_zscore_k <= -2, ] + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- + interaction_scores[interaction_scores$z_lm_l >= 2 & interaction_scores$avg_zscore_l <= -2, ] + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- + interaction_scores[interaction_scores$z_lm_l <= -2 & interaction_scores$avg_zscore_l >= 2, ] + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- + interaction_scores[interaction_scores$z_lm_k <= -2 & interaction_scores$avg_zscore_k >= 2, ] + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- + interaction_scores[interaction_scores$z_lm_k >= 2 & interaction_scores$avg_zscore_k <= -2, ] + interaction_scores_deletion_enhancers_l <- + interaction_scores_deletion_enhancers_l[ + !is.na(interaction_scores_deletion_enhancers_l$OrfRep), ] + interaction_scores_deletion_enhancers_k <- + interaction_scores_deletion_enhancers_k[ + !is.na(interaction_scores_deletion_enhancers_k$OrfRep), ] + interaction_scores_deletion_suppressors_l <- + interaction_scores_deletion_suppressors_l[ + !is.na(interaction_scores_deletion_suppressors_l$OrfRep), ] + interaction_scores_deletion_suppressors_k <- + interaction_scores_deletion_suppressors_k[ + !is.na(interaction_scores_deletion_suppressors_k$OrfRep), ] + interaction_scores_deletion_enhancers_and_suppressors_l <- + interaction_scores_deletion_enhancers_and_suppressors_l[ + !is.na(interaction_scores_deletion_enhancers_and_suppressors_l$OrfRep), ] + interaction_scores_deletion_enhancers_and_suppressors_k <- + interaction_scores_deletion_enhancers_and_suppressors_k[ + !is.na(interaction_scores_deletion_enhancers_and_suppressors_k$OrfRep), ] + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l <- + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l[ + !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l$OrfRep), ] + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l <- + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l[ + !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l$OrfRep), ] + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k <- + interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k[ + !is.na(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k$OrfRep), ] + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k <- + interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k[ + !is.na(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k$OrfRep), ] + write.csv(interaction_scores_deletion_enhancers_l, + file.path(out_dir, "zscores_interaction_deletion_enhancers_l.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_k, + file.path(out_dir, "zscores_interaction_deletion_enhancers_k.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_suppressors_l, + file.path(out_dir, "zscores_interaction_deletion_suppressors_l.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_suppressors_k, + file.path(out_dir, "zscores_interaction_deletion_suppressors_k.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_and_suppressors_l, + file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_and_suppressors_k, + file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_l, + file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_l.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_l, + file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_l.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_lm_suppressors_avg_zscore_k, + file.path(out_dir, "zscores_interaction_suppressors_and_lm_enhancers_k.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_avg_zscore_suppressors_lm_k, + file.path(out_dir, "zscores_interaction_enhancers_and_lm_suppressors_k.csv"), row.names = FALSE) # Get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- - InteractionScores[InteractionScores$Z_lm_L >= 2, ] - InteractionScores_deletion_enhancers_K_lm <- - InteractionScores[InteractionScores$Z_lm_K <= -2, ] - InteractionScores_deletion_suppressors_L_lm <- - InteractionScores[InteractionScores$Z_lm_L <= -2, ] - InteractionScores_deletion_suppressors_K_lm <- - InteractionScores[InteractionScores$Z_lm_K >= 2, ] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- - InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2, ] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- - InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2, ] - InteractionScores_deletion_enhancers_L_lm <- - InteractionScores_deletion_enhancers_L_lm[ - !is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep), ] - InteractionScores_deletion_enhancers_K_lm <- - InteractionScores_deletion_enhancers_K_lm[ - !is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep), ] - InteractionScores_deletion_suppressors_L_lm <- - InteractionScores_deletion_suppressors_L_lm[ - !is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep), ] - InteractionScores_deletion_suppressors_K_lm <- - InteractionScores_deletion_suppressors_K_lm[ - !is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep), ] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- - InteractionScores_deletion_enhancers_and_Suppressors_L_lm[ - !is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep), ] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- - InteractionScores_deletion_enhancers_and_Suppressors_K_lm[ - !is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep), ] + interaction_scores_deletion_enhancers_l_lm <- + interaction_scores[interaction_scores$z_lm_l >= 2, ] + interaction_scores_deletion_enhancers_k_lm <- + interaction_scores[interaction_scores$z_lm_k <= -2, ] + interaction_scores_deletion_suppressors_l_lm <- + interaction_scores[interaction_scores$z_lm_l <= -2, ] + interaction_scores_deletion_suppressors_k_lm <- + interaction_scores[interaction_scores$z_lm_k >= 2, ] + interaction_scores_deletion_enhancers_and_suppressors_l_lm <- + interaction_scores[interaction_scores$z_lm_l >= 2 | interaction_scores$z_lm_l <= -2, ] + interaction_scores_deletion_enhancers_and_suppressors_k_lm <- + interaction_scores[interaction_scores$z_lm_k >= 2 | interaction_scores$z_lm_k <= -2, ] + interaction_scores_deletion_enhancers_l_lm <- + interaction_scores_deletion_enhancers_l_lm[ + !is.na(interaction_scores_deletion_enhancers_l_lm$OrfRep), ] + interaction_scores_deletion_enhancers_k_lm <- + interaction_scores_deletion_enhancers_k_lm[ + !is.na(interaction_scores_deletion_enhancers_k_lm$OrfRep), ] + interaction_scores_deletion_suppressors_l_lm <- + interaction_scores_deletion_suppressors_l_lm[ + !is.na(interaction_scores_deletion_suppressors_l_lm$OrfRep), ] + interaction_scores_deletion_suppressors_k_lm <- + interaction_scores_deletion_suppressors_k_lm[ + !is.na(interaction_scores_deletion_suppressors_k_lm$OrfRep), ] + interaction_scores_deletion_enhancers_and_suppressors_l_lm <- + interaction_scores_deletion_enhancers_and_suppressors_l_lm[ + !is.na(interaction_scores_deletion_enhancers_and_suppressors_l_lm$OrfRep), ] + interaction_scores_deletion_enhancers_and_suppressors_k_lm <- + interaction_scores_deletion_enhancers_and_suppressors_k_lm[ + !is.na(interaction_scores_deletion_enhancers_and_suppressors_k_lm$OrfRep), ] - write.csv(InteractionScores_deletion_enhancers_L_lm, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm, - file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm, - file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv"), row.names = FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm, - file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv"), row.names = FALSE) - # write.csv(Labels, studyInfo, row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_l_lm, + file.path(out_dir, "zscores_interaction_deletion_enhancers_l_lm.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_k_lm, + file.path(out_dir, "zscores_interaction_deletion_enhancers_k_lm.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_suppressors_l_lm, + file.path(out_dir, "zscores_interaction_deletion_suppressors_l_lm.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_suppressors_k_lm, + file.path(out_dir, "zscores_interaction_deletion_suppressors_k_lm.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_and_suppressors_l_lm, + file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_l_lm.csv"), row.names = FALSE) + write.csv(interaction_scores_deletion_enhancers_and_suppressors_k_lm, + file.path(out_dir, "zscores_interaction_deletion_enhancers_and_suppressors_k_lm.csv"), row.names = FALSE) + # write.csv(Labels, study_info_file, row.names = FALSE) # write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE) for (i in 1:num_genes) { - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel, ] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel, ] + gene_sel <- unique(interaction_scores$OrfRep)[i] + x_z_calculations <- x_stats_interaction_all[x_stats_interaction_all$OrfRep == gene_sel, ] + df_int_scores <- interaction_scores[interaction_scores$OrfRep == gene_sel, ] - p_l[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_L)) + + p_l[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_l)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_L, 2))) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_l, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_L, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_L, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_l, 2))) + + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_l, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num))) + + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() - p_K[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_K)) + + p_k[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_k)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_K, 2))) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_k), ymax = 0 + (2 * WT_sd_k)), alpha = 0.3) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$z_shift_k, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_K, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_K, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_k, 2))) + + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$z_lm_k, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num))) + + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + theme_publication() - p_r[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_r)) + + p_r[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_r)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-0.65, 0.65)) + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_r, 2))) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$z_shift_r, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 0.35, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_r, 2))) + + annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$z_lm_r, 2))) + + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num))) + + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + theme_publication() - p_AUC[[i]] <- ggplot(X_ZCalculations, aes(Conc_Num_Factor, Delta_AUC)) + + p_auc[[i]] <- ggplot(x_z_calculations, aes(conc_num_factor, delta_auc)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1], X_ZCalculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(X_Int_Scores$Z_Shift_AUC, 2))) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_auc), ymax = 0 + (2 * WT_sd_auc)), alpha = 0.3) + + ggtitle(paste(x_z_calculations$OrfRep[1], x_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$z_shift_auc, 2))) + scale_color_discrete(guide = FALSE) + - # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(X_Int_Scores$Avg_Zscore_AUC, 2))) + - annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(X_Int_Scores$Z_lm_AUC, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", X_Int_Scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", X_Int_Scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", X_Int_Scores$SM)) + + # annotate("text", x = 1, y = 3500, label = paste("Avg ZScore =", round(df_int_scores$avg_zscore_auc, 2))) + + annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$z_lm_auc, 2))) + + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X_ZCalculations$Conc_Num_Factor), - labels = unique(as.character(X_ZCalculations$Conc_Num))) + + name = unique(df$Drug[1]), + breaks = unique(x_z_calculations$conc_num_factor), + labels = unique(as.character(x_z_calculations$conc_num))) + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() if (i == 1) { - X_stats_interaction_ALL_final <- X_ZCalculations + x_stats_interaction_all_final <- x_z_calculations } if (i > 1) { - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final, X_ZCalculations) + x_stats_interaction_all_final <- rbind(x_stats_interaction_all_final, x_z_calculations) } } print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final, file.path(outDir, "ZScore_Calculations.csv"), row.names = FALSE) + write.csv(x_stats_interaction_all_final, file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - Blank <- ggplot(X2_RF) + geom_blank() + blank <- ggplot(df2_rf) + geom_blank() - pdf(file.path(outDir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) + pdf(file.path(out_dir, "interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) - X_stats_X2_RF <- ddply( - X2_RF, - c("Conc_Num", "Conc_Num_Factor"), + x_stats_df2_rf <- ddply( + df2_rf, + c("conc_num", "conc_num_factor"), summarise, - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - max_L = max(l, na.rm = TRUE), - min_L = min(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + max_l = max(l, na.rm = TRUE), + min_l = min(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + max_k = max(k, na.rm = TRUE), + min_k = min(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_AUC = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + max_auc = max(auc, na.rm = TRUE), + min_auc = min(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) - L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) + geom_point(position = "jitter", size = 1) + + l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), @@ -2160,20 +2121,20 @@ for (s in Background_Strains) { geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + theme_publication() - K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) + geom_point(position = "jitter", size = 1) + + k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), @@ -2181,20 +2142,20 @@ for (s in Background_Strains) { geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + theme_publication() - R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) + geom_point(position = "jitter", size = 1) + + r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), @@ -2202,20 +2163,20 @@ for (s in Background_Strains) { geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + theme_publication() - AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) + geom_point(position = "jitter", size = 1) + + auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, fun.min = function(x) mean(x) - sd(x), @@ -2223,69 +2184,69 @@ for (s in Background_Strains) { geom = "errorbar", color = "red") + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + theme_publication() - L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) + + l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() - K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) + + k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() - r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) + + r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() - AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) + + auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + theme_publication() - grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2) - grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2) + grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) + grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) # Plot the references - # grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4) - # grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2) + # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) + # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 j <- rep(1, ((num_genes) / 3) - 1) @@ -2297,89 +2258,89 @@ for (s in Background_Strains) { num <- 0 for (m in 1:(round((num_genes) / 3) - 1)) { num <- j[m] - grid.arrange(p_l[[num]], p_K[[num]], p_r[[num]], p_AUC[[num]], - p_l[[num + 1]], p_K[[num + 1]], p_r[[num + 1]], p_AUC[[num + 1]], - p_l[[num + 2]], p_K[[num + 2]], p_r[[num + 2]], p_AUC[[num + 2]], ncol = 4, nrow = 3) - # grid.arrange(p_l[[364]], p_K[[364]], p_r[[364]], - # p_l[[365]], p_K[[365]], p_r[[365]], p_l[[366]], p_K[[366]], p_r[[366]], ncol = 3, nrow = 3) - # p1[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_K[[num + 4]], p_r[[num + 4]] + grid.arrange(p_l[[num]], p_k[[num]], p_r[[num]], p_auc[[num]], + p_l[[num + 1]], p_k[[num + 1]], p_r[[num + 1]], p_auc[[num + 1]], + p_l[[num + 2]], p_k[[num + 2]], p_r[[num + 2]], p_auc[[num + 2]], ncol = 4, nrow = 3) + # grid.arrange(p_l[[364]], p_k[[364]], p_r[[364]], + # p_l[[365]], p_k[[365]], p_r[[365]], p_l[[366]], p_k[[366]], p_r[[366]], ncol = 3, nrow = 3) + # p1[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p1[[num + 4]], p_k[[num + 4]], p_r[[num + 4]] } if (num_genes != (num + 2)) { total_num <- num_genes - (num + 2) if (total_num == 5) { - grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], - p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], - p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) + grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], + p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], + p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) - grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]], - p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], p_AUC[[num + 7]], - Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) + grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], + p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], p_auc[[num + 7]], + blank, blank, blank, blank, ncol = 4, nrow = 3) } if (total_num == 4) { - grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], - p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], - p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) + grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], + p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], + p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) - grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], p_AUC[[num + 6]], - Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) + grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], p_auc[[num + 6]], + blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) } if (total_num == 3) { - grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], - p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], - p_l[[num + 5]], p_K[[num + 5]], p_r[[num + 5]], p_AUC[[num + 5]], ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], + p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], + p_l[[num + 5]], p_k[[num + 5]], p_r[[num + 5]], p_auc[[num + 5]], ncol = 4, nrow = 3) + # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], + # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 2) { - grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], - p_l[[num + 4]], p_K[[num + 4]], p_r[[num + 4]], p_AUC[[num + 4]], - Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], + p_l[[num + 4]], p_k[[num + 4]], p_r[[num + 4]], p_auc[[num + 4]], + blank, blank, blank, blank, ncol = 4, nrow = 3) + # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], + # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 1) { - grid.arrange(p_l[[num + 3]], p_K[[num + 3]], p_r[[num + 3]], p_AUC[[num + 3]], - Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3) - # grid.arrange(p_l[[num + 6]], p_K[[num + 6]], p_r[[num + 6]], - # p_l[[num + 7]], p_K[[num + 7]], p_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + grid.arrange(p_l[[num + 3]], p_k[[num + 3]], p_r[[num + 3]], p_auc[[num + 3]], + blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3) + # grid.arrange(p_l[[num + 6]], p_k[[num + 6]], p_r[[num + 6]], + # p_l[[num + 7]], p_k[[num + 7]], p_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } } dev.off() - pdf(file.path(outDir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) + pdf(file.path(out_dir, "rf_interaction_plots.pdf"), width = 16, height = 16, onefile = TRUE) - X_stats_X2_RF <- ddply( - X2_RF, - c("Conc_Num", "Conc_Num_Factor"), + x_stats_df2_rf <- ddply( + df2_rf, + c("conc_num", "conc_num_factor"), summarise, - mean_L = mean(l, na.rm = TRUE), - median_L = median(l, na.rm = TRUE), - max_L = max(l, na.rm = TRUE), - min_L = min(l, na.rm = TRUE), - sd_L = sd(l, na.rm = TRUE), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), + mean_l = mean(l, na.rm = TRUE), + median_l = median(l, na.rm = TRUE), + max_l = max(l, na.rm = TRUE), + min_l = min(l, na.rm = TRUE), + sd_l = sd(l, na.rm = TRUE), + mean_k = mean(k, na.rm = TRUE), + median_k = median(k, na.rm = TRUE), + max_k = max(k, na.rm = TRUE), + min_k = min(k, na.rm = TRUE), + sd_k = sd(k, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), max_r = max(r, na.rm = TRUE), min_r = min(r, na.rm = TRUE), sd_r = sd(r, na.rm = TRUE), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_AUC = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), + mean_auc = mean(auc, na.rm = TRUE), + median_auc = median(auc, na.rm = TRUE), + max_auc = max(auc, na.rm = TRUE), + min_auc = min(auc, na.rm = TRUE), + sd_auc = sd(auc, na.rm = TRUE), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) ) - L_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, l)) + + l_stats <- ggplot(df2_rf, aes(conc_num_factor, l)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, @@ -2389,21 +2350,21 @@ for (s in Background_Strains) { ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + annotate("text", x = -0.25, y = 10, label = "NG") + annotate("text", x = -0.25, y = 5, label = "DB") + annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 5, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 0, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 5, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 0, label = x_stats_df2_rf$SM) + theme_publication() - K_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, K)) + + k_stats <- ggplot(df2_rf, aes(conc_num_factor, k)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, @@ -2413,21 +2374,21 @@ for (s in Background_Strains) { ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(-20, 160)) + annotate("text", x = -0.25, y = -5, label = "NG") + annotate("text", x = -0.25, y = -12.5, label = "DB") + annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -5, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -12.5, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = -20, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -5, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -12.5, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = -20, label = x_stats_df2_rf$SM) + theme_publication() - R_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, r)) + + r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, @@ -2437,21 +2398,21 @@ for (s in Background_Strains) { ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + annotate("text", x = -0.25, y = .9, label = "NG") + annotate("text", x = -0.25, y = .8, label = "DB") + annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .9, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .8, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = .7, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .9, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .8, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = .7, label = x_stats_df2_rf$SM) + theme_publication() - AUC_Stats <- ggplot(X2_RF, aes(Conc_Num_Factor, AUC)) + + auc_stats <- ggplot(df2_rf, aes(conc_num_factor, auc)) + geom_point(position = "jitter", size = 1) + stat_summary( fun = mean, @@ -2461,802 +2422,802 @@ for (s in Background_Strains) { ) + stat_summary(fun = mean, geom = "point", color = "red") + scale_x_continuous( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(0, 12500)) + annotate("text", x = -0.25, y = 11000, label = "NG") + annotate("text", x = -0.25, y = 10000, label = "DB") + annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 11000, label = X_stats_X2_RF$NG) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 10000, label = X_stats_X2_RF$DB) + - annotate("text", x = c(unique(X2_RF$Conc_Num_Factor)), y = 9000, label = X_stats_X2_RF$SM) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 11000, label = x_stats_df2_rf$NG) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 10000, label = x_stats_df2_rf$DB) + + annotate("text", x = c(unique(df2_rf$conc_num_factor)), y = 9000, label = x_stats_df2_rf$SM) + theme_publication() - L_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), l)) + + l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), l)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim = c(0, 130)) + theme_publication() - K_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), K)) + + k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), k)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim = c(0, 160)) + theme_publication() - r_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), r)) + + r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim = c(0, 1)) + theme_publication() - AUC_Stats_Box <- ggplot(X2_RF, aes(as.factor(Conc_Num_Factor), AUC)) + + auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), auc)) + geom_boxplot() + scale_x_discrete( - name = unique(X$Drug[1]), - breaks = unique(X2_RF$Conc_Num_Factor), - labels = as.character(unique(X2_RF$Conc_Num)) + name = unique(df$Drug[1]), + breaks = unique(df2_rf$conc_num_factor), + labels = as.character(unique(df2_rf$conc_num)) ) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) + + ggtitle(paste(s, "Scatter RF for auc with SD", sep = " ")) + coord_cartesian(ylim = c(12000, 0)) + theme_publication() - grid.arrange(L_Stats, K_Stats, R_Stats, AUC_Stats, ncol = 2, nrow = 2) - grid.arrange(L_Stats_Box, K_Stats_Box, r_Stats_Box, AUC_Stats_Box, ncol = 2, nrow = 2) + grid.arrange(l_stats, k_stats, r_stats, auc_stats, ncol = 2, nrow = 2) + grid.arrange(l_stats_box, k_stats_box, r_stats_box, auc_stats_box, ncol = 2, nrow = 2) # Plot the references - # grid.arrange(p3, p3_K, p3_r, p4, p4_K, p4_r, p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 4) - # grid.arrange(p5, p5_K, p5_r, p6, p6_K, p6_r, ncol = 3, nrow = 2) + # grid.arrange(p3, p3_k, p3_r, p4, p4_k, p4_r, p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 4) + # grid.arrange(p5, p5_k, p5_r, p6, p6_k, p6_r, ncol = 3, nrow = 2) # Loop for grid arrange 4x3 - j <- rep(1, ((num_genes_RF) / 3) - 1) + j <- rep(1, ((num_genes_rf) / 3) - 1) for (n in 1:length(j)) { j[n + 1] <- n * 3 + 1 } # Loop for printing each plot num <- 0 - for (m in 1:(round((num_genes_RF) / 3) - 1)) { + for (m in 1:(round((num_genes_rf) / 3) - 1)) { num <- j[m] - grid.arrange(p_rf_l[[num]], p_rf_K[[num]], p_rf_r[[num]], p_rf_AUC[[num]], - p_rf_l[[num + 1]], p_rf_K[[num + 1]], p_rf_r[[num + 1]], p_rf_AUC[[num + 1]], - p_rf_l[[num + 2]], p_rf_K[[num + 2]], p_rf_r[[num + 2]], p_rf_AUC[[num + 2]], ncol = 4, nrow = 3) - # grid.arrange(p_rf_l[[364]], p_rf_K[[364]], p_rf_r[[364]], - # p_rf_l[[365]], p_rf_K[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_K[[366]], p_rf_r[[366]], ncol = 3, nrow = 3) - # p1[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]] + grid.arrange(p_rf_l[[num]], p_rf_k[[num]], p_rf_r[[num]], p_rf_auc[[num]], + p_rf_l[[num + 1]], p_rf_k[[num + 1]], p_rf_r[[num + 1]], p_rf_auc[[num + 1]], + p_rf_l[[num + 2]], p_rf_k[[num + 2]], p_rf_r[[num + 2]], p_rf_auc[[num + 2]], ncol = 4, nrow = 3) + # grid.arrange(p_rf_l[[364]], p_rf_k[[364]], p_rf_r[[364]], + # p_rf_l[[365]], p_rf_k[[365]], p_rf_r[[365]], p_rf_l[[366]], p_rf_k[[366]], p_rf_r[[366]], ncol = 3, nrow = 3) + # p1[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p1[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]] } - if (num_genes_RF != (num + 2)) { - total_num <- num_genes_RF - (num + 2) + if (num_genes_rf != (num + 2)) { + total_num <- num_genes_rf - (num + 2) if (total_num == 5) { - grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], - p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], - p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], + p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], + p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) - grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]], - p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], p_rf_AUC[[num + 7]], - Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], + p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], p_rf_auc[[num + 7]], + blank, blank, blank, blank, ncol = 4, nrow = 3 ) } if (total_num == 4) { - grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], - p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], - p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], + p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], + p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) - grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], p_rf_AUC[[num + 6]], - Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], p_rf_auc[[num + 6]], + blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 ) } if (total_num == 3) { - grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], - p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], - p_rf_l[[num + 5]], p_rf_K[[num + 5]], p_rf_r[[num + 5]], p_rf_AUC[[num + 5]], ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], + p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], + p_rf_l[[num + 5]], p_rf_k[[num + 5]], p_rf_r[[num + 5]], p_rf_auc[[num + 5]], ncol = 4, nrow = 3 ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], + # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 2) { - grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], - p_rf_l[[num + 4]], p_rf_K[[num + 4]], p_rf_r[[num + 4]], p_rf_AUC[[num + 4]], - Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], + p_rf_l[[num + 4]], p_rf_k[[num + 4]], p_rf_r[[num + 4]], p_rf_auc[[num + 4]], + blank, blank, blank, blank, ncol = 4, nrow = 3 ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], + # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } if (total_num == 1) { - grid.arrange(p_rf_l[[num + 3]], p_rf_K[[num + 3]], p_rf_r[[num + 3]], p_rf_AUC[[num + 3]], - Blank, Blank, Blank, Blank, Blank, Blank, Blank, Blank, ncol = 4, nrow = 3 + grid.arrange(p_rf_l[[num + 3]], p_rf_k[[num + 3]], p_rf_r[[num + 3]], p_rf_auc[[num + 3]], + blank, blank, blank, blank, blank, blank, blank, blank, ncol = 4, nrow = 3 ) - # grid.arrange(p_rf_l[[num + 6]], p_rf_K[[num + 6]], p_rf_r[[num + 6]], - # p_rf_l[[num + 7]], p_rf_K[[num + 7]], p_rf_r[[num + 7]], Blank, Blank, Blank, ncol = 3, nrow = 3) + # grid.arrange(p_rf_l[[num + 6]], p_rf_k[[num + 6]], p_rf_r[[num + 6]], + # p_rf_l[[num + 7]], p_rf_k[[num + 7]], p_rf_r[[num + 7]], blank, blank, blank, ncol = 3, nrow = 3) } } dev.off() - # Print rank plots for L and K gene interactions - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L), ]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K), ]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r), ]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC), ]$Avg_Zscore_AUC <- 0.001 + # Print rank plots for L and k gene interactions + interaction_scores_adjust_missing <- interaction_scores + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_l), ]$avg_zscore_l <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_k), ]$avg_zscore_k <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_r), ]$avg_zscore_r <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$avg_zscore_auc), ]$avg_zscore_auc <- 0.001 - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA + interaction_scores_adjust_missing$l_rank <- NA + interaction_scores_adjust_missing$k_rank <- NA + interaction_scores_adjust_missing$r_rank <- NA + interaction_scores_adjust_missing$auc_rank <- NA - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + interaction_scores_adjust_missing$l_rank <- rank(interaction_scores_adjust_missing$avg_zscore_l) + interaction_scores_adjust_missing$k_rank <- rank(interaction_scores_adjust_missing$avg_zscore_k) + interaction_scores_adjust_missing$r_rank <- rank(interaction_scores_adjust_missing$avg_zscore_r) + interaction_scores_adjust_missing$auc_rank <- rank(interaction_scores_adjust_missing$avg_zscore_auc) - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L), ]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K), ]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r), ]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC), ]$Z_lm_AUC <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_l), ]$z_lm_l <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_k), ]$z_lm_k <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_r), ]$z_lm_r <- 0.001 + interaction_scores_adjust_missing[is.na(interaction_scores_adjust_missing$z_lm_auc), ]$z_lm_auc <- 0.001 - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + interaction_scores_adjust_missing$l_rank_lm <- NA + interaction_scores_adjust_missing$k_rank_lm <- NA + interaction_scores_adjust_missing$r_rank_lm <- NA + interaction_scores_adjust_missing$auc_rank_lm <- NA - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + interaction_scores_adjust_missing$l_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_l) + interaction_scores_adjust_missing$k_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_k) + interaction_scores_adjust_missing$r_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_r) + interaction_scores_adjust_missing$auc_rank_lm <- rank(interaction_scores_adjust_missing$z_lm_auc) # Rank plots - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + + rank_l_1sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -1, ])[1])) + theme_publication() - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_2sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -2, ])[1])) + theme_publication() - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_3sd <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l >= 3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_l <= -3, ])[1])) + theme_publication() - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + + rank_k_1sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 1, ])[1])) + theme_publication() - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_2sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 2, ])[1])) + theme_publication() - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_3sd <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k <= -3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$avg_zscore_k >= 3, ])[1])) + theme_publication() - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + + rank_l_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + + rank_k_1sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_2sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_3sd_notext <- ggplot(interaction_scores_adjust_missing, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - pdf(file.path(outDir, "RankPlots.pdf"), width = 18, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "rank_plots.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( - Rank_L_1SD, - Rank_L_2SD, - Rank_L_3SD, - Rank_K_1SD, - Rank_K_2SD, - Rank_K_3SD, + rank_l_1sd, + rank_l_2sd, + rank_l_3sd, + rank_k_1sd, + rank_k_2sd, + rank_k_3sd, ncol = 3, nrow = 2 ) grid.arrange( - Rank_L_1SD_notext, - Rank_L_2SD_notext, - Rank_L_3SD_notext, - Rank_K_1SD_notext, - Rank_K_2SD_notext, - Rank_K_3SD_notext, + rank_l_1sd_notext, + rank_l_2sd_notext, + rank_l_3sd_notext, + rank_k_1sd_notext, + rank_k_2sd_notext, + rank_k_3sd_notext, ncol = 3, nrow = 2 ) dev.off() - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + + rank_l_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -1, ])[1])) + theme_publication() - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -2, ])[1])) + theme_publication() - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l >= 3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_l <= -3, ])[1])) + theme_publication() - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + + rank_k_1sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -1, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 1, ])[1])) + theme_publication() - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_2sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -2, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 2, ])[1])) + theme_publication() - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_3sd_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3, ])[1])) + - annotate("text", x = (dim(InteractionScores_AdjustMissing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", - dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = -10, label = paste("Deletion Enhancers =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k <= -3, ])[1])) + + annotate("text", x = (dim(interaction_scores_adjust_missing)[1] / 2), y = 10, label = paste("Deletion Suppressors =", + dim(interaction_scores_adjust_missing[interaction_scores_adjust_missing$z_lm_k >= 3, ])[1])) + theme_publication() - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + + rank_l_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + + rank_k_1sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_2sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_3sd_notext_lm <- ggplot(interaction_scores_adjust_missing, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - pdf(file.path(outDir, "RankPlots_lm.pdf"), width = 18, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "rank_plots_lm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( - Rank_L_1SD_lm, - Rank_L_2SD_lm, - Rank_L_3SD_lm, - Rank_K_1SD_lm, - Rank_K_2SD_lm, - Rank_K_3SD_lm, + rank_l_1sd_lm, + rank_l_2sd_lm, + rank_l_3sd_lm, + rank_k_1sd_lm, + rank_k_2sd_lm, + rank_k_3sd_lm, ncol = 3, nrow = 2 ) grid.arrange( - Rank_L_1SD_notext_lm, - Rank_L_2SD_notext_lm, - Rank_L_3SD_notext_lm, - Rank_K_1SD_notext_lm, - Rank_K_2SD_notext_lm, - Rank_K_3SD_notext_lm, + rank_l_1sd_notext_lm, + rank_l_2sd_notext_lm, + rank_l_3sd_notext_lm, + rank_k_1sd_notext_lm, + rank_k_2sd_notext_lm, + rank_k_3sd_notext_lm, ncol = 3, nrow = 2 ) dev.off() - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L), ] + df_na_rm <- interaction_scores[!is.na(interaction_scores$z_lm_l) | !is.na(interaction_scores$avg_zscore_l), ] # Find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2, ]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2, ]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + df_na_rm$Overlap <- "No Effect" + try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Both") + try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Both") + try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= 2, ]$Overlap <- "Deletion Enhancer lm only") + try(df_na_rm[df_na_rm$z_lm_l <= 2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= -2, ]$Overlap <- "Deletion Suppressor lm only") + try(df_na_rm[df_na_rm$z_lm_l >= -2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(df_na_rm[df_na_rm$z_lm_l >= 2 & df_na_rm$avg_zscore_l <= -2, ]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(df_na_rm[df_na_rm$z_lm_l <= -2 & df_na_rm$avg_zscore_l >= 2, ]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L ~ X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) + get_lm_l <- lm(df_na_rm$z_lm_l ~ df_na_rm$avg_zscore_l) + l_lm <- summary(get_lm_l) - get_lm_K <- lm(X_NArm$Z_lm_K ~ X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) + get_lm_k <- lm(df_na_rm$z_lm_k ~ df_na_rm$avg_zscore_k) + k_lm <- summary(get_lm_k) - get_lm_r <- lm(X_NArm$Z_lm_r ~ X_NArm$Avg_Zscore_r) + get_lm_r <- lm(df_na_rm$z_lm_r ~ df_na_rm$avg_zscore_r) r_lm <- summary(get_lm_r) - get_lm_AUC <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) + get_lm_auc <- lm(df_na_rm$z_lm_auc ~ df_na_rm$avg_zscore_auc) + auc_lm <- summary(get_lm_auc) - pdf(file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "avg_zscore_vs_lm_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) - print(ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L)) + + print(ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right()) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right()) - print(ggplot(X_NArm, aes(Avg_Zscore_K, Z_lm_K)) + + print(ggplot(df_na_rm, aes(avg_zscore_k, z_lm_k)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm K") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(K_lm$r.squared, 2))) + theme_publication_legend_right()) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(k_lm$r.squared, 2))) + theme_publication_legend_right()) - print(ggplot(X_NArm, aes(Avg_Zscore_r, Z_lm_r)) + + print(ggplot(df_na_rm, aes(avg_zscore_r, z_lm_r)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm r") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(r_lm$r.squared, 2))) + theme_publication_legend_right()) - print(ggplot(X_NArm, aes(Avg_Zscore_AUC, Z_lm_AUC)) + + print(ggplot(df_na_rm, aes(avg_zscore_auc, z_lm_auc)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm AUC") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(AUC_lm$r.squared, 2))) + theme_publication_legend_right()) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(auc_lm$r.squared, 2))) + theme_publication_legend_right()) dev.off() - lm_v_Zscore_L <- ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) + + lm_v_zscore_l <- ggplot(df_na_rm, aes(avg_zscore_l, z_lm_l, ORF = OrfRep, Gene = Gene, NG = NG, SM = SM, DB = DB)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Avg Zscore vs lm L") + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right() + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm$r.squared, 2))) + theme_publication_legend_right() - pgg <- ggplotly(lm_v_Zscore_L) - plotly_path <- file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.html") + pgg <- ggplotly(lm_v_zscore_l) + plotly_path <- file.path(out_dir, "avg_zscore_vs_lm_na_rm.html") saveWidget(pgg, file = plotly_path, selfcontained = TRUE) - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + df_na_rm$l_rank <- rank(df_na_rm$avg_zscore_l) + df_na_rm$k_rank <- rank(df_na_rm$avg_zscore_k) + df_na_rm$r_rank <- rank(df_na_rm$avg_zscore_r) + df_na_rm$auc_rank <- rank(df_na_rm$avg_zscore_auc) + df_na_rm$l_rank_lm <- rank(df_na_rm$z_lm_l) + df_na_rm$k_rank_lm <- rank(df_na_rm$z_lm_k) + df_na_rm$r_rank_lm <- rank(df_na_rm$z_lm_r) + df_na_rm$auc_rank_lm <- rank(df_na_rm$z_lm_auc) # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm ~ X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - get_lm_K2 <- lm(X_NArm$K_Rank_lm ~ X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - get_lm_r2 <- lm(X_NArm$r_Rank_lm ~ X_NArm$r_Rank) + get_lm_l2 <- lm(df_na_rm$l_rank_lm ~ df_na_rm$l_rank) + l_lm2 <- summary(get_lm_l2) + get_lm_k2 <- lm(df_na_rm$k_rank_lm ~ df_na_rm$k_rank) + k_lm2 <- summary(get_lm_k2) + get_lm_r2 <- lm(df_na_rm$r_rank_lm ~ df_na_rm$r_rank) r_lm2 <- summary(get_lm_r2) - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm ~ X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - num_genes_NArm2 <- (dim(X_NArm)[1]) / 2 + get_lm_auc2 <- lm(df_na_rm$auc_rank_lm ~ df_na_rm$auc_rank) + auc_lm2 <- summary(get_lm_auc2) + num_genes_na_rm2 <- (dim(df_na_rm)[1]) / 2 - pdf(file.path(outDir, "Avg_Zscore_vs_lm_ranked_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "avg_zscore_vs_lm_ranked_na_rm.pdf"), width = 16, height = 12, onefile = TRUE) print( - ggplot(X_NArm, aes(L_Rank, L_Rank_lm)) + + ggplot(df_na_rm, aes(l_rank, l_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm L") + - annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(L_lm2$r.squared, 2))) + + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(l_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( - ggplot(X_NArm, aes(K_Rank, K_Rank_lm)) + + ggplot(df_na_rm, aes(k_rank, k_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm K") + - annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(K_lm2$r.squared, 2))) + + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(k_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( - ggplot(X_NArm, aes(r_Rank, r_Rank_lm)) + + ggplot(df_na_rm, aes(r_rank, r_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank Avg Zscore vs lm r") + - annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) + + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(r_lm2$r.squared, 2))) + theme_publication_legend_right() ) print( - ggplot(X_NArm, aes(AUC_Rank, AUC_Rank_lm)) + + ggplot(df_na_rm, aes(auc_rank, auc_rank_lm)) + geom_point(aes(color = Overlap), shape = 3) + geom_smooth(method = "lm", color = 1) + ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text", x = num_genes_NArm2, y = num_genes_NArm2, label = paste("R-squared = ", round(AUC_lm2$r.squared, 2))) + + annotate("text", x = num_genes_na_rm2, y = num_genes_na_rm2, label = paste("R-squared = ", round(auc_lm2$r.squared, 2))) + theme_publication_legend_right() ) dev.off() - Rank_L_1SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + + rank_l_1sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 1, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -1, ])[1])) + theme_publication() - Rank_L_2SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_2sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 2, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -2, ])[1])) + theme_publication() - Rank_L_3SD <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_3sd <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_L >= 3, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_L <= -3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_l >= 3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_l <= -3, ])[1])) + theme_publication() - Rank_K_1SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + + rank_k_1sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -1, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 1, ])[1])) + theme_publication() - Rank_K_2SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_2sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -2, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 2, ])[1])) + theme_publication() - Rank_K_3SD <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_3sd <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Avg_Zscore_K <= -3, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Avg_Zscore_K >= 3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$avg_zscore_k <= -3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$avg_zscore_k >= 3, ])[1])) + theme_publication() - Rank_L_1SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + + rank_l_1sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_2SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_2sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_3SD_notext <- ggplot(X_NArm, aes(L_Rank, Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + rank_l_3sd_notext <- ggplot(df_na_rm, aes(l_rank, avg_zscore_l)) + + ggtitle("Average Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Avg Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_1SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + + rank_k_1sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_2SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_2sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_3SD_notext <- ggplot(X_NArm, aes(K_Rank, Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + rank_k_3sd_notext <- ggplot(df_na_rm, aes(k_rank, avg_zscore_k)) + + ggtitle("Average Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Avg Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - pdf(file.path(outDir, "RankPlots_naRM.pdf"), width = 18, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "rank_plots_narm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( - Rank_L_1SD, - Rank_L_2SD, - Rank_L_3SD, - Rank_K_1SD, - Rank_K_2SD, - Rank_K_3SD, + rank_l_1sd, + rank_l_2sd, + rank_l_3sd, + rank_k_1sd, + rank_k_2sd, + rank_k_3sd, ncol = 3, nrow = 2 ) grid.arrange( - Rank_L_1SD_notext, - Rank_L_2SD_notext, - Rank_L_3SD_notext, - Rank_K_1SD_notext, - Rank_K_2SD_notext, - Rank_K_3SD_notext, + rank_l_1sd_notext, + rank_l_2sd_notext, + rank_l_3sd_notext, + rank_k_1sd_notext, + rank_k_2sd_notext, + rank_k_3sd_notext, ncol = 3, nrow = 2 ) dev.off() - Rank_L_1SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + + rank_l_1sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 1, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -1, ])[1])) + theme_publication() - Rank_L_2SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_2sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 2, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -2, ])[1])) + theme_publication() - Rank_L_3SD_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_3sd_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_L >= 3, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_L <= -3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_l >= 3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_l <= -3, ])[1])) + theme_publication() - Rank_K_1SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + + rank_k_1sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -1, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -1, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 1, ])[1])) + theme_publication() - Rank_K_2SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_2sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -2, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -2, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 2, ])[1])) + theme_publication() - Rank_K_3SD_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_3sd_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(X_NArm[X_NArm$Z_lm_K <= -3, ])[1])) + - annotate("text", x = (dim(X_NArm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(X_NArm[X_NArm$Z_lm_K >= 3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = -10, label = paste("Deletion Enhancers =", dim(df_na_rm[df_na_rm$z_lm_k <= -3, ])[1])) + + annotate("text", x = (dim(df_na_rm)[1] / 2), y = 10, label = paste("Deletion Suppressors =", dim(df_na_rm[df_na_rm$z_lm_k >= 3, ])[1])) + theme_publication() - Rank_L_1SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + + rank_l_1sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_2SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_2sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 2sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_L_3SD_notext_lm <- ggplot(X_NArm, aes(L_Rank_lm, Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + rank_l_3sd_notext_lm <- ggplot(df_na_rm, aes(l_rank_lm, z_lm_l)) + + ggtitle("Interaction Z score vs. Rank for L above 3sd") + xlab("Rank") + ylab("Int Z score L") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_1SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + + rank_k_1sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (1), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-1), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-1, 1)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_2SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_2sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 2sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (2), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-2), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-2, 2)) + geom_point(size = 0.1, shape = 3) + theme_publication() - Rank_K_3SD_notext_lm <- ggplot(X_NArm, aes(K_Rank_lm, Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + rank_k_3sd_notext_lm <- ggplot(df_na_rm, aes(k_rank_lm, z_lm_k)) + + ggtitle("Interaction Z score vs. Rank for K above 3sd") + xlab("Rank") + ylab("Int Z score K") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (3), ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = (-3), ymax = -Inf, fill = "orange", alpha = 0.3) + geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) + theme_publication() - pdf(file.path(outDir, "RankPlots_lm_naRM.pdf"), width = 18, height = 12, onefile = TRUE) + pdf(file.path(out_dir, "rank_plots_lm_narm.pdf"), width = 18, height = 12, onefile = TRUE) grid.arrange( - Rank_L_1SD_lm, - Rank_L_2SD_lm, - Rank_L_3SD_lm, - Rank_K_1SD_lm, - Rank_K_2SD_lm, - Rank_K_3SD_lm, + rank_l_1sd_lm, + rank_l_2sd_lm, + rank_l_3sd_lm, + rank_k_1sd_lm, + rank_k_2sd_lm, + rank_k_3sd_lm, ncol = 3, nrow = 2 ) grid.arrange( - Rank_L_1SD_notext_lm, - Rank_L_2SD_notext_lm, - Rank_L_3SD_notext_lm, - Rank_K_1SD_notext_lm, - Rank_K_2SD_notext_lm, - Rank_K_3SD_notext_lm, + rank_l_1sd_notext_lm, + rank_l_2sd_notext_lm, + rank_l_3sd_notext_lm, + rank_k_1sd_notext_lm, + rank_k_2sd_notext_lm, + rank_k_3sd_notext_lm, ncol = 3, nrow = 2 ) @@ -3264,28 +3225,28 @@ for (s in Background_Strains) { } # Get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K ~ X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) -get_lm_2 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) -get_lm_3 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) -get_lm_4 <- lm(X_NArm$Z_lm_r ~ X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) -get_lm_5 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) -get_lm_6 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) +get_lm_1 <- lm(df_na_rm$z_lm_k ~ df_na_rm$z_lm_l) +l_lm_1 <- summary(get_lm_1) +get_lm_2 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_l) +l_lm_2 <- summary(get_lm_2) +get_lm_3 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_l) +l_lm_3 <- summary(get_lm_3) +get_lm_4 <- lm(df_na_rm$z_lm_r ~ df_na_rm$z_lm_k) +l_lm_4 <- summary(get_lm_4) +get_lm_5 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_k) +l_lm_5 <- summary(get_lm_5) +get_lm_6 <- lm(df_na_rm$z_lm_auc ~ df_na_rm$z_lm_r) +l_lm_6 <- summary(get_lm_6) -pdf(file.path(outDir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE) +pdf(file.path(out_dir, "correlation_cpps.pdf"), width = 10, height = 7, onefile = TRUE) -ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3296,13 +3257,13 @@ ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3313,13 +3274,13 @@ ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + -annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) + +annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3330,13 +3291,13 @@ theme( axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + +ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3347,13 +3308,13 @@ ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3364,13 +3325,13 @@ ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3380,15 +3341,15 @@ ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) + axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L), ] +interaction_scores_rf2 <- interaction_scores_rf[!is.na(interaction_scores_rf$z_lm_l), ] -ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_k)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_K), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_k), color = "cyan") + ggtitle("Interaction L vs. Interaction K") + xlab("z-score L") + ylab("z-score K") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_1$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_1$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3399,13 +3360,13 @@ ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_r)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_r), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_r), color = "cyan") + ggtitle("Interaction L vs. Interaction r") + xlab("z-score L") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_2$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_2$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3416,13 +3377,13 @@ ggplot(X_NArm, aes(Z_lm_L, Z_lm_r)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_l, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_L, Z_lm_AUC), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_l, z_lm_auc), color = "cyan") + ggtitle("Interaction L vs. Interaction AUC") + xlab("z-score L") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_3$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_3$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3433,13 +3394,13 @@ ggplot(X_NArm, aes(Z_lm_L, Z_lm_AUC)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + +ggplot(df_na_rm, aes(z_lm_k, z_lm_r)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_r), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_r), color = "cyan") + ggtitle("Interaction K vs. Interaction r") + xlab("z-score K") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_4$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_4$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3450,13 +3411,13 @@ ggplot(X_NArm, aes(Z_lm_K, Z_lm_r)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_k, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_K, Z_lm_AUC), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_k, z_lm_auc), color = "cyan") + ggtitle("Interaction K vs. Interaction AUC") + xlab("z-score K") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_5$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_5$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), @@ -3467,13 +3428,13 @@ ggplot(X_NArm, aes(Z_lm_K, Z_lm_AUC)) + axis.title.y = element_text(size = 18) ) -ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) + +ggplot(df_na_rm, aes(z_lm_r, z_lm_auc)) + geom_point(shape = 3, color = "gray70") + - geom_point(data = InteractionScores_RF2, aes(Z_lm_r, Z_lm_AUC), color = "cyan") + + geom_point(data = interaction_scores_rf2, aes(z_lm_r, z_lm_auc), color = "cyan") + ggtitle("Interaction r vs. Interaction AUC") + xlab("z-score r") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm_6$r.squared, 3))) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(l_lm_6$r.squared, 3))) + theme_publication_legend_right() + theme( panel.grid.major = element_blank(), diff --git a/workflow/apps/r/joinInteractExps.R b/workflow/apps/r/joinInteractExps.R index 3b685364..f01f40d1 100644 --- a/workflow/apps/r/joinInteractExps.R +++ b/workflow/apps/r/joinInteractExps.R @@ -1,44 +1,45 @@ #!/usr/bin/env Rscript # JoinInteractExps.R -library(plyr) -library(sos) -library(dplyr) +library("plyr") +library("sos") +library("dplyr") args <- commandArgs(TRUE) # Set output dir if (length(args) >= 1) { - outDir <- file.path(args[1]) + out_dir <- file.path(args[1]) } else { - outDir <- "./" # for legacy workflow + out_dir <- "./" # for legacy workflow } # Set sd value if (length(args) >= 2) { - sd <- args[2] + sd <- as.numeric(args[2]) } else { sd <- 2 # default value } -print(paste("SD=", sd)) -# Set studyInfo file +sprintf("SD value is: %f", sd) + +# Set study_info file if (length(args) >= 3) { - studyInfo <- file.path(args[3]) + study_info <- file.path(args[3]) } else { - studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow + study_info <- "../Code/StudyInfo.csv" # for legacy workflow } studies <- args[3:length(args)] -inputFiles <- c() +input_files <- c() for (study in 1:length(studies)) { - zsFile <- file.path(study, "zscores", "zscores_interaction.csv") - if (file.exists(zsFile)) { - inputFiles[study] <- zsFile + zs_file <- file.path(study, "zscores", "zscores_interaction.csv") + if (file.exists(zs_file)) { + input_files[study] <- zs_file } } -print(length(inputFiles)) +print(length(input_files)) # TODO this is better handled in a loop in case you want to compare more experiments? # The input is already designed for this @@ -46,38 +47,38 @@ print(length(inputFiles)) # Join the two files at a time as a function of how many inputFile # list the larger file first ? in this example X2 has the larger number of genes # If X1 has a larger number of genes, switch the order of X1 and X2 -if (length(inputFiles) == 2) { - X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) - X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) +if (length(input_files) == 2) { + X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) + X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) X <- join(X1, X2, by = "OrfRep") OBH <- X[, order(colnames(X))] # OrderByHeader headSel <- select(OBH, contains("OrfRep"), matches("Gene"), contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) - headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column + headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums - headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums -} else if (length(inputFiles) == 3) { - X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) - X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) - X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) + headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums +} else if (length(input_files) == 3) { + X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE) + X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE) + X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE) X <- join(X1, X2, by = "OrfRep") X <- join(X, X3, by = "OrfRep") - OBH <- X[, order(colnames(X))] #OrderByHeader + OBH <- X[, order(colnames(X))] # OrderByHeader headSel <- select(OBH, contains("OrfRep"), matches("Gene"), contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) headSel <- select(headSel, -"Gene.1", -"Gene.2") headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) headSel2 <- select(headSel2, -"Gene.1", -"Gene.2") -} else if (length(inputFiles) == 4) { - X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) - X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) - X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) - X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE) +} else if (length(input_files) == 4) { + X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE) + X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE) + X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE) + X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE) X <- join(X1, X2, by = "OrfRep") X <- join(X, X3, by = "OrfRep") X <- join(X, X4, by = "OrfRep") - OBH <- X[, order(colnames(X))] #OrderByHeader + OBH <- X[, order(colnames(X))] # OrderByHeader headSel <- select(OBH, contains("OrfRep"), matches("Gene"), contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3") @@ -221,13 +222,13 @@ if (std == 0) { # R places hidden "" around the header names. The following # is intended to remove those quote so that the "" do not blow up the Java REMc. # Use ,quote=F in the write.csv statement to fix R output file. -# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE) -write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) -write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) +# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE) +write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) +write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) #LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",") -LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE) +LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE) print(std) LabelStd[, 4] <- as.numeric(std) -write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE) -write.csv(LabelStd, file = studyInfo, row.names = FALSE) +write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE) +write.csv(LabelStd, file = study_info, row.names = FALSE) diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index bb80c188..7ec2eb03 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -155,7 +155,7 @@ parse_input() { debug "Running: ${FUNCNAME[0]} $*" long_opts="project:,module:,wrapper:,nomodule:,markdown,yes,auto,debug,help" - short_opts="+p:m:s:n:ydh" + short_opts="+p:m:w:n:ydh" if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then eval set -- "$input" @@ -177,7 +177,7 @@ parse_input() { ;; --nomodule|-n) shift - EXCLUDE_MODULES+=("$1") + IFS=',' read -ra EXCLUDE_MODULES <<< "$1" ;; --markdown) generate_markdown; exit 0 # TODO disable the exit after development @@ -189,7 +189,8 @@ parse_input() { declare -g DEBUG=1 ;; --help|-h) - print_help; exit 0 + print_help + exit 0 ;; --) shift @@ -320,7 +321,21 @@ random_three_words() { printf "%s_" "${arr[@]}" | sed 's/_$//' } +# @description More concise debugging +# @arg $1 array command(s) to run +# @exitcode 0 command successful +# @exitcode 1 command not successful +# @internal +execute() { + if debug "$*"; then + "$@" + else + "$@" &>/dev/null + fi +} + # @description Backup one or more files to an incremented .bk file +# # @exitcode backup iterator max 255 # @internal backup() { @@ -339,6 +354,7 @@ backup() { } # @description Prints a helpful message add program start +# # @internal interactive_header() { debug "Running: ${FUNCNAME[0]}" @@ -366,62 +382,6 @@ interactive_header() { underline=$(tput smul) nounderline=$(tput rmul) - # Gather and list projects from SCANS_DIR - shopt -s nullglob - projects=("$SCANS_DIR"/*/) - shopt -u nullglob - - if [[ ${#projects[@]} -eq 0 ]]; then - echo "No projects found in $SCANS_DIR" - ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}") - else - echo "${underline}Projects${nounderline}" - projects=("${projects[@]%/}") # strip comma first! - projects=("${projects[@]##*/}") - - # Determine if we need two columns - if [[ ${#projects[@]} -gt 8 ]]; then - # Calculate the number of elements in each column - num_columns=$(( (${#projects[@]} + 1) / 2 )) - - # Determine the maximum width of the first column - max_width=0 - for ((i=0; i for all + * A comma-separated list of module numbers: 2,5,12 + * 0 for none (wrappers only) + EOF + ((YES)) || read -r -p "(all): " response + echo "" + if [[ -z $response ]]; then + MODULES=("${ALL_MODULES[@]}") + elif [[ $response -eq 0 ]]; then + EXCLUDE_MODULES=("${ALL_MODULES[@]}") + else + IFS=',' read -ra arr <<< "$response" + for i in "${arr[@]}"; do + if [[ $i =~ ^[0-9]+$ ]]; then + MODULES+=("${ALL_MODULES[$((i-1))]}") + else + err "Module number $i is invalid, skipping" + fi + done + fi + unset response arr i + fi + + # If we're just installing dependencies, skip the rest + [[ ${MODULES[*]} == "install_dependencies" ]] && return 0 + + # Submodule selection + if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then + while :; do + cat <<-EOF + ${underline}Enter wrapper(s) to run followed by its arguments in a comma-separated string${nounderline} + * for none + * A comma-separated list of wrappers and their arguments + * Both arguments are required + * Quote the argument string if it contains whitespace + * Example: ${ALL_WRAPPERS[0]},\"arg1,arg2,arg3...\",${ALL_WRAPPERS[1]},\"arg1,arg2,arg3...\" + EOF + ((YES)) || read -r -p "(none): " response + echo "" + [[ -z $response ]] && break + IFS=',' read -ra arr <<< "$response" + if [[ $((${#arr[@]} % 2)) -eq 0 ]]; then # check if array is even + WRAPPERS+=("${arr[@]}") + else + err "The second argument is required (may be an empty string, \"\")" + fi + unset response arr i + done + fi + echo "" + + # Project selection if [[ ${#PROJECTS[@]} -eq 0 ]]; then num=${#projects[@]} if [[ $num -eq 0 ]]; then @@ -549,56 +620,6 @@ interactive_header() { ask_pn && unset "PROJECTS[i]" && PROJECTS+=("${ADD_PROJECTS[@]}") fi done - - if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then - cat <<-EOF - ${underline}Enter modules(s) to run${nounderline} - * for all - * A comma-separated list of module numbers: 2,5,12 - * 0 for none - EOF - ((YES)) || read -r -p "(all): " response - echo "" - if [[ -z $response ]]; then - MODULES=("${ALL_MODULES[@]}") - elif [[ $response -eq 0 ]]; then - EXCLUDE_MODULES=("${ALL_MODULES[@]}") - else - IFS=',' read -ra arr <<< "$response" - for i in "${arr[@]}"; do - if [[ $i =~ ^[0-9]+$ ]]; then - MODULES+=("${ALL_MODULES[$((i-1))]}") - else - err "Module number $i is invalid, skipping" - fi - done - fi - unset response arr i - fi - - if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then - while :; do - cat <<-EOF - ${underline}Enter wrapper(s) to run followed by its arguments in a comma-separated string${nounderline} - * for none - * A comma-separated list of wrappers and their arguments - * Both arguments are required - * Quote the argument string if it contains whitespace - * Example: ${ALL_WRAPPERS[0]},\"arg1,arg2,arg3...\",${ALL_WRAPPERS[1]},\"arg1,arg2,arg3...\" - EOF - ((YES)) || read -r -p "(none): " response - echo "" - [[ -z $response ]] && break - IFS=',' read -ra arr <<< "$response" - if [[ $((${#arr[@]} % 2)) -eq 0 ]]; then # check if array is even - WRAPPERS+=("${arr[@]}") - else - err "The second argument is required (may be an empty string, \"\")" - fi - unset response arr i - done - fi - echo "" } @@ -644,17 +665,24 @@ install_dependencies() { debug "Running: ${FUNCNAME[0]} $*" # Dependency arrays - depends_rpm=(graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano - rsync coreutils libcurl-devel openssl-devel harfbuzz-devel fribidi-devel - R-core R-core-devel) - depends_deb=(graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync - coreutils libcurl-dev openssl-dev libharfbuzz-dev libfribidi-dev r-base r-base-dev) - depends_brew=(graphiz pandoc gd pdftk-java shdoc nano perl rsync coreutils harfbuzz fribidi r) - depends_perl=(Test::Warnings Test::Fatal File::Map Sub::Uplevel ExtUtils::Config + depends_rpm=( + graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano + rsync coreutils libcurl-devel openssl-devel harfbuzz-devel + fribidi-devel R-core R-core-devel java) + depends_deb=( + graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync + coreutils libcurl-dev openssl-dev libharfbuzz-dev libfribidi-dev + r-base r-base-dev default-jre) + depends_brew=( + graphiz pandoc gd pdftk-java shdoc nano perl rsync coreutils + harfbuzz fribidi r java) + depends_perl=( + Test::Warnings Test::Fatal File::Map Sub::Uplevel ExtUtils::Config ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder) - depends_r=(BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 - plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown - plotly htmlwidgets gplots gdata) + depends_r=( + BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 + plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc + rmarkdown plotly htmlwidgets gplots gdata) depends_bioc=(UCSC.utils org.Sc.sgd.db) [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars @@ -747,7 +775,7 @@ module init_project # # * Initializes a project directory in the scans directory # -# TODO +# **TODO** # # * Copy over source image directories from robot # * MasterPlate_ file **should not be an xlsx file**, no portability @@ -755,7 +783,7 @@ module init_project # * But moving forward should switch to csv or something open # * Do we need to sync a QHTCP template? # -# NOTES +# **NOTES** # # * Copy over the images from the robot and then DO NOT TOUCH that directory except to copy from it # * Write-protect (read-only) if we need to @@ -767,7 +795,6 @@ module init_project init_project() { debug "Running: ${FUNCNAME[0]}" - # Create a project scans dir [[ -d $PROJECT_SCANS_DIR ]] || (mkdir -p "$PROJECT_SCANS_DIR" || return 1) @@ -823,127 +850,20 @@ init_project() { rsync --archive "$QHTCP_TEMPLATE_DIR"/ "$QHTCP_RESULTS_DIR" fi - # Create the study info file - # Use initials from project or whoami? - # Best I can do is first two letters of username - # See TODO in markdown - initials="${USER:0:2}" - INITIALS=${initials^^} - - empty_study=1 - # Find an Exp directory that does not exist - while [[ -d $QHTCP_RESULTS_DIR/Exp$empty_study ]]; do - (( empty_study++ )) - done - - next_study_entry="$empty_study,$PROJECT_SUFFIX,NA,NA,$INITIALS" - - echo "${underline}Study Info File${nounderline}" - - if [[ -f $STUDY_INFO_FILE ]]; then - # Get latest entry - get_studies "$STUDY_INFO_FILE" - largest=${STUDIES_NUMS[0]} - for i in "${STUDIES_NUMS[@]}"; do - if ((i > largest)); then - largest=$i - fi - done - empty_study=$((largest+1)) - next_study_entry="$((empty_study)),$PROJECT_SUFFIX,NA,NA,$INITIALS" - else # create a default StudyInfo.csv - echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE" - echo "$next_study_entry" >> "$STUDY_INFO_FILE" - next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" - fi - - # Print current studies - cat <<-EOF - * Give each experiment labels to be used for the plots and specific files. - * Enter the desired experiment names in the order they should appear in the REMc heatmaps - - Current study info file contents: - - ${underline}$STUDY_INFO_FILE${nounderline} - $(cat "$STUDY_INFO_FILE") - - EOF - - # Allow user to add/edit the study info file - if ! ((YES)); then - for ((i=1; i<2; i++)); do - cat <<-EOF - Next entry suggestion: "$next_study_entry" - - Would you like to: - * (a)dd the suggested entry - * (e)dit the StudyInfo.csv file manually - * (c)ontinue (default) - EOF - read -r -p "(c): " response - echo "" - [[ -z $response ]] && break - case $response in - a) - echo "Adding auto-entry suggestion to $STUDY_INFO_FILE" - echo "$next_study_entry" >> "$STUDY_INFO_FILE" - next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" - i=0 - ;; - e) - debug "${EDITOR:-nano} $STUDY_INFO_FILE" - ${EDITOR:-nano} "$STUDY_INFO_FILE" - ;; - c) - break - ;; - *) - err "Invalid response, please try again" - i=0 - ;; - esac - break - done - fi - - get_studies "$STUDY_INFO_FILE" - - # Initialize missing Exp dirs - STUDIES_DIRS=() - for s in "${STUDIES_NUMS[@]}"; do - study_dir="$QHTCP_RESULTS_DIR/Exp$s" - STUDIES_DIRS+=("$study_dir") - [[ -d $study_dir ]] || mkdir "$study_dir" - # # We don't need a template anymore? - # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then - # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR" - # continue - # fi - # fi - done - + study_info # Write skeleton files in csv # If we have to convert to xlsx later, so be it - echo "In the future, create the DrugMedia file here" + echo "TODO: in the future, offer to create the DrugMedia file here" # cat <<-EOF > "$DRUG_MEDIA_FILE" - - # EOF - - # TODO here we'll copy scans from robot but for now let's pause and wait for transfer - echo "In the future, create the MasterPlate file here" + echo "TODO: in the future, offer to create the MasterPlate file here" # cat <<-EOF > "$MASTER_PLATE_FILE" - - # EOF - - - } @@ -1297,21 +1217,20 @@ easy() { # Backup and create EASY results dirs [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR" - [[ -d $EASY_RESULTS_DIR ]] || mkdir -p "$EASY_RESULTS_DIR" + [[ -d $EASY_RESULTS_DIR ]] || execute mkdir "$EASY_RESULTS_DIR" # Make EASY dirs dirs=('PrintResults' 'CFfigs' 'Fotos') for d in "${dirs[@]}"; do if [[ ! -d $EASY_RESULTS_DIR/$d ]]; then - debug "mkdir $EASY_RESULTS_DIR/$d" - mkdir "$EASY_RESULTS_DIR/$d" + execute mkdir "$EASY_RESULTS_DIR/$d" fi done # Copy Templates declare -gx DRUG_MEDIA_FILE="$SCANS_DIR/DrugMedia_$PROJECT_NAME.xls" declare -gx MASTER_PLATE_FILE="$EASY_RESULTS_DIR/MasterPlate_$PROJECT_NAME.xls" - rsync -a "$EASY_DIR"/{figs,PTmats} "$EASY_RESULTS_DIR" + execute rsync -a "$EASY_DIR"/{figs,PTmats} "$EASY_RESULTS_DIR" # Ask the user to launch EASYconsole.m in MATLAB # MATLAB doesn't support passing args to scripts se we have to use ENV VARS instead @@ -1350,12 +1269,11 @@ module qhtcp # @description System for Multi-QHTCP-Experiment Gene Interaction Profiling Analysis # # * Functional rewrite of REMcMaster3.sh, RemcMaster2.sh, REMcJar2.sh, ExpFrontend.m, mProcess.sh, mFunction.sh, mComponent.sh -# * Added a newline character to the end of StudyInfo.csv so it is a valid text file +# * Added a newline character to the end of the study info file so it is a valid text file # # TODO # # * Suggest renaming StudiesQHTCP to something like qhtcp qhtcp_output or output -# * Store StudyInfo somewhere better # * Move (hide) the study template somewhere else # * StudiesArchive should be smarter: # * Create a database with as much information as possible @@ -1417,8 +1335,8 @@ qhtcp() { [[ -d $QHTCP_RESULTS_DIR ]] || err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?" - # Sets STUDIES_NUMS - get_studies "$STUDY_INFO_FILE" + # Sets STUDIES_NUMS and STUDIES_DIRS + study_info choose_easy_results "$EASY_OUT_DIR" @@ -1441,21 +1359,15 @@ qhtcp() { # Run R interactions script on all studies for s in "${STUDIES_NUMS[@]}"; do - STUDY_DIR="$QHTCP_RESULTS_DIR/Exp$s" - r_interactions \ - "$EASY_RESULTS_DIR/results_std.txt" \ - "$STUDY_DIR/zscores" \ - "$STUDY_INFO_FILE" \ - "$APPS_DIR/r/SGD_features.tab" \ - 3 \ - "$s" - done - - read -r -p "Press enter to continue" response - - # Run remc as part of the QHTCP process - # pass all the study directories to it so the scripts have all the paths - remc "$STUDY_INFO_FILE" "${STUDIES_DIRS[@]}" + [[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] || + execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores" + [[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] || + execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc" + r_interactions "$s" + done \ + && remc \ + && gtf \ + && gta } @@ -1464,42 +1376,30 @@ module remc # # TODO # -# * Which components can be parallelized? -# @arg $1 string studyInfo file +# * Which components can be parallelized? +# * Move Exp directory discovery from scripts to this module +# +# +# @arg $1 string study info file remc() { - debug "Running: ${FUNCNAME[0]} $*" + debug "Running: ${FUNCNAME[0]}" + + # Sets STUDIES_NUMS and STUDIES_DIRS + study_info # If any wrappers fail the rest will not run, this is fundamental to module design # Remove leading && to run regardless - # TODO can this be r_join_interactions \ - "$QHTCP_RESULTS_DIR" # output directory - 2 \ % sd value - "$1" # studyInfo file - "${@:2}" \ + "${STUDIES_DIRS[@]}" \ && java_extract \ - "$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" \ - "$APPS_DIR/java/ORF_List_Without_DAmPs.txt" \ - "$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv" \ - "$QHTCP_RESULTS_DIR" \ - "$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ && r_add_shift_values \ - "$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ - "$QHTCP_RESULTS_DIR/Shift_only.csv" \ - "$1" \ - "$QHTCP_RESULTS_DIR/REMcWithShift.csv" \ && r_create_heat_maps \ - "$QHTCP_RESULTS_DIR/REMcWithShift.csv" \ - "$QHTCP_RESULTS_DIR" \ - && r_heat_maps_homology \ - "$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ - "$APPS_DIR/r/170503_DAmPs_Only.txt" \ - "$APPS_DIR/r/Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv" \ - "$QHTCP_RESULTS_DIR/homology" + && r_heat_maps_homology } module gtf +# shellcheck disable=SC2120 # @description GTF module for QHTCP # @arg $1 string output directory # @arg $2 string gene_association.sgd @@ -1507,23 +1407,18 @@ module gtf # @arg $4 string ORF_List_Without_DAmPs.txt gtf() { debug "Running: ${FUNCNAME[0]}" - gtf_out_dir="${1:-$QHTCP_RESULTS_DIR/gtf}" - gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" - gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" - orf_list="${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}" - process_dir="$gtf_out_dir/process" - function_dir="$gtf_out_dir/function" - component_dir="$gtf_out_dir/component" + process_dir="$GTF_OUT_DIR/process" + function_dir="$GTF_OUT_DIR/function" + component_dir="$GTF_OUT_DIR/component" py_gtf_dcon \ "$process_dir" \ - "$gtf_out_dir" + "$GTF_OUT_DIR" # Reproduce the function and components dirs from the process dir for d in "$function_dir" "$component_dir"; do - debug "rsync -a $process_dir/ $d/" - rsync -a "$process_dir/" "$d/" + execute rsync -a "$process_dir/" "$d/" done for d in "$process_dir" "$function_dir" "$component_dir"; do @@ -1532,25 +1427,17 @@ gtf() { txts=("$d"/*.txt) # glob all txt files from each dir shopt -u nullglob for txt in "${txts[@]}"; do - debug "pl_gtf_analyze -an $gene_association_sgd -as P -o $gene_ontology_obo -b $orf_list $txt" - pl_gtf_analyze \ - '-an' "$gene_association_sgd" \ - '-as' 'P' \ - '-o' "$gene_ontology_obo" \ - '-b' "$orf_list" \ - "$txt" - debug "pl_terms2tsv $txt" + pl_gtf_analyze "$txt" pl_gtf_terms2tsv "$txt" done - debug "py_gtf_concat $gtf_out_dir $out_file" - py_gtf_concat "$gtf_out_dir" "$out_file" + py_gtf_concat "$GTF_OUT_DIR" "$out_file" done - - r_compile_gtf "$gtf_out_dir" + r_compile_gtf "$GTF_OUT_DIR" } module gta +# shellcheck disable=SC2120 # @description GTA module for QHTCP # # TODO @@ -1563,41 +1450,41 @@ module gta # @arg $3 string gene_ontology_edit.obo # @arg $4 string go_terms.tab # @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv -# @arg $6 string zscores_interaction.csv gta() { debug "Running: ${FUNCNAME[0]}" - gta_out_dir="${1:-"$QHTCP_RESULTS_DIR/gta"}" gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}" - all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" - zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results + all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" + + # TODO This could be wrong, it could be in main results - # Sets STUDIES_NUMS - get_studies "$STUDY_INFO_FILE" + # Sets STUDIES_NUMS and STUDIES_DIRS + study_info - [[ -d $gta_out_dir ]] || mkdir "$gta_out_dir" + [[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR" + execute mkdir "$GTA_OUT_DIR" # Loop over the array and create pairwise arrays for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do - for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do - pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}") - echo "${pair[@]}" - done + for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do + pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}") + echo "${pair[@]}" + done done # Create unique parwise combinations of study nums from dir names study_combos=() for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do - # Loop through the array again - for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do - # If the indices are not the same - if [ "$i" != "$j" ]; then - # Print the unique combination - study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}") - fi - done + # Loop through the array again + for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do + # If the indices are not the same + if [ "$i" != "$j" ]; then + # Print the unique combination + study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}") + fi + done done # The following are three types of studies @@ -1606,13 +1493,8 @@ gta() { for s in "${STUDIES_NUMS[@]}"; do zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file" if [[ -f $zscores_file ]]; then - mkdir "$gta_out_dir/Exp$s" - r_gta \ - "Exp$s" \ - "$zscores_file" \ - "$sgd_terms_tfile" \ - "$gene_association_sgd" \ - "$gta_out_dir" + mkdir "$GTA_OUT_DIR/Exp$s" + r_gta "Exp$s" "$zscores_file" fi done @@ -1620,23 +1502,16 @@ gta() { for combo in "${study_combos[@]}"; do # Split on comma and assign to array IFS=',' read -ra studies <<< "$combo" - r_gta_pairwiselk \ - "${studies[0]}" \ - "${studies[1]}" \ - "$STUDY_INFO_FILE" \ - "Average_GOTerms_All.csv" \ - "$gta_out_dir" + r_gta_pairwiselk "${studies[0]}" "${studies[1]}" done # All studies - # If you have an unknown # of studies it must be passed last and any preceding arguments - # are required + # All preceding arguments are required so we can pass multiple studies r_gta_heatmaps \ "$STUDY_INFO_FILE" \ "$gene_ontology_obo" \ "$sgd_terms_tfile" \ "$all_sgd_terms_csv" \ - "$zscores_file" \ "$QHTCP_RESULTS_DIR" \ "$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \ "${STUDIES_NUMS[@]}" @@ -1678,8 +1553,8 @@ wrapper r_gta # * Average_GOTerms_All.csv # # -# @arg $1 string Exp# name -# @arg $2 string ZScores_Interaction.csv file +# @arg $1 string Exp# name (required) +# @arg $2 string zscores_interaction.csv (required) # @arg $3 string go_terms.tab file # @arg $4 string [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd) # @arg $5 string output directory @@ -1690,15 +1565,18 @@ r_gta() { EOF script="$APPS_DIR/r/gtaTemplate.R" - [[ -d $5 ]] || mkdir -p "$5" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ + + out_file="${5:-"$GTA_OUT_DIR"}/Average_GOTerms_All.csv" + + execute "$RSCRIPT" "$script" \ "$1" \ "$2" \ - "$3" \ - "$4" \ - "$5" \ - "${@:6}" + "${3:-"$APPS_DIR/r/go_terms.tab"}" \ + "${4:-"$APPS_DIR/r/gene_association.sgd"}" \ + "${5:-"$GTA_OUT_DIR"}" \ + "${@:6}" # future arguments + + [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } @@ -1726,9 +1604,9 @@ wrapper r_gta_pairwiselk # * The code uses the naming convention of PairwiseCompare_Exp’#’-Exp’#’ to standardize and keep simple the structural naming (where ‘X’ is either K or L and ‘Y’ is the number of the experiment GTA results to be found in ../GTAresult/Exp_). # * {FYI There are also individual scripts that just do the ‘L’ or ‘K’ pairwise studies in the ../Code folder.} # -# @arg $1 string First Exp# name -# @arg $2 string Second Exp# name -# @arg $3 string StudyInfo.csv file +# @arg $1 string First Exp# name (required) +# @arg $2 string Second Exp# name (required) +# @arg $3 string study info file # @arg $4 string output directory # r_gta_pairwiselk() { @@ -1741,13 +1619,12 @@ r_gta_pairwiselk() { [[ -d $4 ]] || mkdir -p "$4" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ + execute "$RSCRIPT" "$script" \ "$1" \ "$2" \ - "$3" \ - "$4" \ - "${@:5}" + "${3:-$STUDY_INFO_FILE}" \ + "${4:-"$GTA_OUT_DIR"}" \ + "${@:5}" # future arguments } @@ -1756,9 +1633,10 @@ wrapper r_gta_heatmaps # # TODO # -# * Script could use rename -# * Script should be refactored to automatically allow more studies -# * Script should be refactored with more looping to reduce verbosity +# * Rename +# * Refactor to automatically allow more studies +# * Refactor with more looping to reduce verbosity +# * Reduce cyclomatic complexity of some of the for loops # # Files # @@ -1772,17 +1650,16 @@ wrapper r_gta_heatmaps # This wrapper: # # * The Term Specific Heatmaps are produced directly from the ../ExpStudy/Exp_/ZScores/ZScores_Interaction.csv file generated by the user modified interaction… .R script. -# * The heatmap labeling is per the names the user wrote into the StudyInfo.txt spreadsheet. +# * The heatmap labeling is per the names the user wrote into the study info file # * Verify that the All_SGD_GOTerms_for_QHTCPtk.csv found in ../Code is what you wish to use or if you wish to use a custom modified version. # * If you wish to use a custom modified version, create it and modify the TSHeatmaps template script (TSHeatmaps5dev2.R) and save it as a ‘TSH_study specific name’. # -# @arg $1 string StudyInfo.csv file -# @arg $2 string gene_ontology_edit.obo file -# @arg $3 string go_terms.tab file +# @arg $1 string study info file +# @arg $2 string gene_ontology_edit.obo +# @arg $3 string go_terms.tab # @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv -# @arg $5 string ZScores_interaction.csv -# @arg $6 string base directory -# @arg $7 string output directory +# @arg $5 string base directory +# @arg $6 string output directory # r_gta_heatmaps() { debug "Running: ${FUNCNAME[0]} $*" @@ -1792,8 +1669,15 @@ r_gta_heatmaps() { script="$APPS_DIR/r/TSHeatmaps5dev2.R" [[ -d $7 ]] || mkdir -p "$7" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" "$@" + + execute "$RSCRIPT" "$script" \ + "${1:-$STUDY_INFO_FILE}" \ + "${2:-"$APPS_DIR/r/gene_ontology_edit.obo"}" \ + "${3:-"$APPS_DIR/r/go_terms.tab"}" \ + "${4:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" \ + "${5:-"$QHTCP_RESULTS_DIR"}" \ + "${6:-"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps"}" \ + "${@:7}" # studies } @@ -1810,19 +1694,27 @@ wrapper r_interactions # * Re-enable disabled linter checks # * Reduce cyclomatic complexity of some of the for loops # * There needs to be one point of truth for the SD factor -# * Replace most paste() functions with printf() +# * Replace most paste() functions with sprintf() +# +# INPUT +# +# * easy/results_std.txt +# +# OUTPUT +# +# * zscores/zscores_interaction.csv +# * etc. # # NOTES # # * # -# @arg $1 string The input directory -# @arg $2 string The zscores directory -# @arg $3 string The study info file +# @arg $1 integer Exp number (required) +# @arg $2 integer delta SD background value (default: 3) +# @arg $3 string study info file # @arg $4 string SGD_features.tab -# @arg $5 integer delta SD background value (default: 5) -# @arg $6 integer experiment number - +# @arg $5 string easy/results_std.txt +# @arg $6 string zscores directory r_interactions() { debug "Running: ${FUNCNAME[0]} $*" cat <<-EOF @@ -1835,20 +1727,18 @@ r_interactions() { * This is most often "trial and error", meaning there is a 'Frequency_Delta_Background.pdf' report in the /Exp_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice). * In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value. * Background values are reported in the results sheet and so could also be analyzed there. - EOF script="$APPS_DIR/r/interactions.R" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ + execute "$RSCRIPT" "$script" \ "$1" \ - "$2" \ - "$3" \ + "${2:-3}" \ + "${3:-"$STUDY_INFO_FILE"}" \ "${4:-"$APPS_DIR/r/SGD_features.tab"}" \ - "${5:-3}" \ - "${6}" \ - "${@:7}" # optional arguments + "${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \ + "${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \ + "${@:7}" # future arguments } @@ -1856,27 +1746,33 @@ wrapper r_join_interactions # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv # # TODO +# # * Needs more loops to reduce verbosity # +# INPUT # -# Output +# * study info file +# * Exp#/zscores/zscores_interaction.csv +# +# OUTPUT # # * REMcRdy_lm_only.csv # * Shift_only.csv # * parameters.csv # -# @arg $1 string The output directory -# @arg $2 string The sd value -# @arg $3 string The studyInfo file +# @arg $1 string output directory +# @arg $2 string sd value (default: 2) +# @arg $3 string study info file r_join_interactions() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/r/joinInteractExps.R" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ - "$1" \ - "$2" \ - "$3" \ - "${@:4}" # optional arguments + + execute "$RSCRIPT" "$script" \ + "${1:-$QHTCP_RESULTS_DIR}" \ + "${2:-2}" \ + "${3:-$STUDY_INFO_FILE}" \ + "${@:4:-${STUDIES_DIRS[@]}}" + local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv") for f in "${out_files[@]}"; do [[ -f $f ]] || (echo "$f does not exist"; return 1) @@ -1885,77 +1781,89 @@ r_join_interactions() { wrapper java_extract +# shellcheck disable=SC2120 # @description Jingyu's REMc java utility # -# Input -# -# * REMcRdy_lm_only.csv -# -# Output -# -# * REMcRdy_lm_only.csv-finalTable.csv -# -# NOTE +# TODO # # * Closed-source w/ hardcoded output directory, so have to pushd/popd to run (not ideal) # -# @arg $1 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab -# @arg $2 string ORF_List_Without_DAmPs.txt -# @arg $3 string REMcRdy_lm_only.csv -# @arg $4 string The output directory -# @arg $5 string The output file +# INPUT +# +# * REMcRdy_lm_only.csv +# +# OUTPUT +# +# * REMcRdy_lm_only.csv-finalTable.csv +# +# @arg $1 string output directory +# @arg $2 string REMcRdy_lm_only.csv +# @arg $3 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab +# @arg $4 string ORF_List_Without_DAmPs.txt # @exitcode 0 if expected output file exists # @exitcode 1 if expected output file does not exist - java_extract() { debug "Running: ${FUNCNAME[0]}" classpath="$APPS_DIR/java/javaExtract.jar" - - # backup previous output - if ! backup "${5:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"}"; then - ask "Backup of ${5:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"} failed, continue?" || return 1 - fi + output_file="${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv-finalTable.csv" + + [[ -f $output_file ]] && backup "$output_file" + java_cmd=( - "$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain - "${3:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}" - "${1:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}" - "${2:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}" + "$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 + -classpath "$classpath" ExecMain + "${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}" + "${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}" + "${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}" ) debug "pushd && ${java_cmd[*]} && popd" - pushd "${4:-"$QHTCP_RESULTS_DIR"}" && "${java_cmd[@]}" && popd || return 1 - [[ -f ${5:-$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv} ]] + pushd "${1:-$QHTCP_RESULTS_DIR}" && "${java_cmd[@]}" && popd || return 1 + [[ -f $output_file ]] } wrapper r_add_shift_values +# shellcheck disable=SC2120 # @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv # and output "REMcWithShift.csv" for use with the REMc heat maps +# # @arg $1 string REMcRdy_lm_only.csv-finalTable.csv # @arg $2 string Shift_only.csv -# @arg $3 string StudyInfo.csv file -# @arg $4 string The sd value +# @arg $3 string study info file +# @arg $4 string REMcWithShift.csv r_add_shift_values() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/r/addShiftVals.R" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ - "$1" \ - "$2" \ - "$3" \ - "$4" \ - "${@:5}" # optional arguments + + execute "$RSCRIPT" "$script" \ + "${1:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"}" \ + "${2:-"$QHTCP_RESULTS_DIR/Shift_only.csv"}" \ + "${3:-$STUDY_INFO_FILE}" \ + "${4:-"$QHTCP_RESULTS_DIR/REMcWithShift.csv"}" \ + "${@:5}" # future arguments + rm -f "$QHTCP_RESULTS_DIR/REMcHeatmaps/"*.pdf - out_file="$QHTCP_RESULTS_DIR/REMcWithShift.csv" + out_file="${4:-"$QHTCP_RESULTS_DIR/REMcWithShift.csv"}" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } wrapper r_create_heat_maps +# shellcheck disable=SC2120 # @description Execute createHeatMaps.R # +# INPUT +# +# * REMcWithShift.csv +# +# OUTPUT +# +# * compiledREMcHeatmaps.pdf +# # TODO +# # * Needs more looping for brevity # # @@ -1965,43 +1873,45 @@ wrapper r_create_heat_maps r_create_heat_maps() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/r/createHeatMaps.R" - debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" \ - "$1" \ - "$2" \ - "${@:3}" # optional arguments + + execute "$RSCRIPT" "$script" \ + "${1:-"$QHTCP_RESULTS_DIR/REMcWithShift.csv"}" \ + "${2:-"$QHTCP_RESULTS_DIR"}" \ + "${@:3}" # future arguments + pdfs=(REMcHeatmaps/*.pdf) - debug "pdftk ${pdfs[*]} output $out_file" - pdftk "${pdfs[@]}" output "$out_file" + execute pdftk "${pdfs[@]}" output "$out_file" out_file="$2/compiledREMcHeatmaps.pdf" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } wrapper r_heat_maps_homology -# @description Execute createHeatMapsAll.R -# @arg $1 string REMcRdy_lm_only.csv-finalTable.csv -# @arg $2 string Shift_only.csv -# @arg $3 string The (Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv) -# @arg $4 string The output directory +# shellcheck disable=SC2120 +# @description Execute createHeatMapsHomology.R +# +# @arg $1 string output directory +# @arg $2 string REMcRdy_lm_only.csv-finalTable.csv +# @arg $3 string 170503_DAmPs_Only.txt +# @arg $4 string (Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv) r_heat_maps_homology() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/r/createHeatMapsHomology.R" + + out_file="${1:-"$QHTCP_RESULTS_DIR/homology"}/compiledREMcHomologyHeatmaps.pdf" - # Remove old output - debug "Removing old pdfs and csvs from $4" - rm "$4/"*.{pdf,csv} + debug "Removing old pdf and csv files from ${1:-"$QHTCP_RESULTS_DIR/homology"}" + rm "${1:-"$QHTCP_RESULTS_DIR/homology"}/"*.{pdf,csv} - "$RSCRIPT" "$script" \ - "$1" \ - "$2" \ - "$3" \ - "$4" \ - "${@:5}" # optional arguments + execute "$RSCRIPT" "$script" \ + "${1:-"$QHTCP_RESULTS_DIR/homology"}" \ + "${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"}" \ + "${3:-"$APPS_DIR/r/170503_DAmPs_Only.txt"}" \ + "${4:-"$APPS_DIR/r/Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv"}" \ + "${@:5}" # future arguments - pdfs=("$work_dir"/homology/*.pdf) - pdftk "${pdfs[@]}" output "$out_file" - out_file="$4/compiledREMcHomologyHeatmaps.pdf" + pdfs=("${1:-"$QHTCP_RESULTS_DIR/homology"}"/*.pdf) + execute pdftk "${pdfs[@]}" output "$out_file" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } @@ -2009,19 +1919,22 @@ r_heat_maps_homology() { wrapper py_gtf_dcon # @description Perform python dcon portion of GTF # -# Output +# SCRIPT: [DconJG2.py](apps/python/DconJG2.py) +# +# OUTPUT +# +# * 1-0-0-finaltable.csv # -# * 1-0-0-finaltable.csv # @arg $1 string Directory to process # @arg $2 string Output directory name py_gtf_dcon() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/python/DconJG2.py" - debug "$PYTHON $script $1 $2/" - "$PYTHON" "$script" \ + + execute "$PYTHON" "$script" \ "$1" \ "$2/" \ - "${@:3}" # optional arguments + "${@:3}" # future arguments out_file="$2/1-0-0-finaltable.csv" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } @@ -2029,29 +1942,37 @@ py_gtf_dcon() { wrapper pl_gtf_analyze # @description Perl analyze wrapper -# This seems weird to me because we're just overwriting the same data for all set2 members -# https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl -# Is there a reason you need a custom version and not the original from cpan? -# @arg $1 string gene_association.sgd -# @arg $2 string gene_ontology_edit.obo -# @arg $3 string ORF_List_Without_DAmPs.txt -# @arg $4 string TODO txt to anaylze? I'm not sure what this is called +# +# SCRIPT: [analyze_v2.pl](https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl) +# +# TODO +# +# * Are we just overwriting the same data for all set2 members? +# * Why the custom version? +# +# @arg $1 string txt to analyze (required) +# @arg $2 string gene_association.sgd +# @arg $3 string gene_ontology_edit.obo +# @arg $4 string ORF_List_Without_DAmPs.txt pl_gtf_analyze() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/perl/analyze_v2.pl" - debug "$PERL $script $*" - "$PERL" "$script" \ - "$1" \ - "$2" \ - "$3" \ - "$4" \ - "${@:5}" # optional arguments + + execute "$PERL" "$script" \ + "-an" "${2:-"$APPS_DIR/r/gene_association.sgd"}" \ + "-as" "P" \ + "-o" "${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" \ + "-b" "${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}" \ + "$1" } wrapper pl_gtf_terms2tsv # @description Perl terms2tsv wrapper -# Probably should be translated to shell/python +# +# TODO +# +# * Probably should be translated to shell/python # # @arg $1 string Terms file TODO naming pl_gtf_terms2tsv() { @@ -2065,14 +1986,17 @@ pl_gtf_terms2tsv() { wrapper py_gtf_concat # @description Python concat wrapper for GTF # Concat the process ontology outputs from the /REMcReady_lm_only folder -# Probably should be translated to bash +# +# TODO +# +# * Probably should be translated to bash +# # @arg $1 string output directory name to look for txt files # @arg $2 string output file py_gtf_concat() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/python/concatGTFResults.py" - debug "$PYTHON $script $1/ $2" - "$PYTHON" "$script" "$1/" "$2" + execute "$PYTHON" "$script" "$1/" "$2" [[ -f $2 ]] || (echo "$2 does not exist"; return 1) } @@ -2083,39 +2007,135 @@ wrapper r_compile_gtf r_compile_gtf() { debug "Running: ${FUNCNAME[0]} $*" script="$APPS_DIR/r/CompileGTF.R" - debug "$RSCRIPT $script $1" - "$RSCRIPT" "$script" "$1" + execute "$RSCRIPT" "$script" "$1" } - -# @description Parse study names from StudyInfo.csv files +# @description Creates, modifies, and parses the study info file # # TODO # -# * This whole wrapper should eventually be either -# * Removed -# * Expanded into a file that stores all project/study settings (database) -# * I had to had a new line to the end of StudyInfo.csv, may break things? +# * Needs refactoring +# * Ended up combining a few functions into one +# # # @exitcode 0 If one or more studies found # @exitcode 1 If no studies found -# @set STUDIES_NUMS array Contains Exp numbers -# @arg $1 string Study info file -# -get_studies() { +# @set STUDIES_NUMS array contains Exp numbers +# @set STUDIES_DIRS array contains Exp directories +study_info() { debug "Running: ${FUNCNAME[0]}" - declare -ga STUDIES_NUMS=() + + # Only run this once per project + # in case we run multiple modules + (( SET_STUDIES )) && return 0 + declare -g SET_STUDIES=1 + + # Use initials from project or whoami? + # Best I can do is first two letters of username + # See TODO in markdown + initials="${USER:0:2}" + INITIALS=${initials^^} + + empty_study=1 + # Find an Exp directory that does not exist + while [[ -d $QHTCP_RESULTS_DIR/Exp$empty_study ]]; do + (( empty_study++ )) + done + + next_study_entry="$empty_study,$PROJECT_SUFFIX,NA,NA,$INITIALS" + + echo "${underline}Study Info File${nounderline}" + + if [[ -f $STUDY_INFO_FILE ]]; then + # Get latest entry + while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column + studies_nums+=("$col1") + done < <(tail -n +2 "$STUDY_INFO_FILE") + largest=${studies_nums[0]} + for i in "${studies_nums[@]}"; do + if ((i > largest)); then + largest=$i + fi + done + empty_study=$((largest+1)) + next_study_entry="$((empty_study)),$PROJECT_SUFFIX,NA,NA,$INITIALS" + else # create a default study info file + echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE" + echo "$next_study_entry" >> "$STUDY_INFO_FILE" + next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" + fi + + # Print current studies + cat <<-EOF + * Give each experiment labels to be used for the plots and specific files. + * Enter the desired experiment names in the order they should appear in the REMc heatmaps + + Current study info file contents: + + ${underline}$STUDY_INFO_FILE${nounderline} + $(cat "$STUDY_INFO_FILE") + + EOF + + # Allow user to add/edit the study info file + if ! ((YES)); then + for ((i=1; i<2; i++)); do + cat <<-EOF + Next entry suggestion: "$next_study_entry" + + Would you like to: + * (a)dd the suggested entry + * (e)dit the study info file manually + * (c)ontinue (default) + EOF + read -r -p "(c): " response + echo "" + [[ -z $response ]] && break + case $response in + a) + echo "Adding auto-entry suggestion to $STUDY_INFO_FILE" + echo "$next_study_entry" >> "$STUDY_INFO_FILE" + next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" + i=0 + ;; + e) + debug "${EDITOR:-nano} $STUDY_INFO_FILE" + ${EDITOR:-nano} "$STUDY_INFO_FILE" + ;; + c) + break + ;; + *) + err "Invalid response, please try again" + i=0 + ;; + esac + break + done + fi # Read study info file while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column STUDIES_NUMS+=("$col1") - done < <(tail -n +2 "$1") # skip header + done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header + # Initialize missing Exp dirs + STUDIES_DIRS=() + for s in "${STUDIES_NUMS[@]}"; do + study_dir="$QHTCP_RESULTS_DIR/Exp$s" + STUDIES_DIRS+=("$study_dir") + [[ -d $study_dir ]] || mkdir "$study_dir" + + # We don't need a template anymore? + # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then + # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR" + # continue + # fi + done + + # Return true if at least one study was found [[ ${#STUDIES_NUMS[@]} -gt 0 ]] - - - unset STUDY_DIR } @@ -2132,6 +2152,9 @@ choose_easy_results() { declare -a easy_results_dirs=( "$1/"*/ ) shopt -u nullglob + # Strip trailing slash + easy_results_dirs=("${easy_results_dirs[@]%/}") + num=${#easy_results_dirs[@]} if [[ $num -eq 0 ]]; then @@ -2182,9 +2205,7 @@ generate_markdown() { # @description The main loop of qhtcp-workflow -# May eventually need to add git ops -# Passes on arguments -# Most variables in main() are user configurable or can be overriden by env +# # @internal main() { debug "Running: ${FUNCNAME[0]} $*" @@ -2321,6 +2342,8 @@ main() { declare -gx STUDY_INFO_FILE="$QHTCP_RESULTS_DIR/StudyInfo.csv" declare -gx EASY_OUT_DIR="$QHTCP_RESULTS_DIR/easy" declare -gx EASY_RESULTS_DIR="$EASY_OUT_DIR/$PROJECT_PREFIX" + declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta" + declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf" declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"} if ((DEBUG)); then echo "Debug:" @@ -2336,17 +2359,19 @@ main() { # Run selected modules for m in "${MODULES[@]}"; do - ask "Run $m module?" && "$m" + if ask "Run $m module?"; then + "$m" || return 1 + fi done # Run selected wrappers for i in "${!WRAPPERS[@]}"; do - IFS=',' read -ra cmds <<< "${WRAPPERS[$((i+1))]}" # load the command args - ask "Run ${WRAPPERS[i]} wrapper with args ${cmds[*]}?" && - "${WRAPPERS[i]}" "${cmds[@]}" + IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args + if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then + "${WRAPPERS[i]}" "${args[@]}" || return 1 + fi continue 2 # skip the command string done - done cat <<-EOF @@ -2354,7 +2379,7 @@ main() { And wrapper(s): ${WRAPPERS[*]} On project(s): ${PROJECTS[*]} EOF - unset PROJECTS MODULES WRAPPERS EXCLUDE_MODULES + unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES } # (Safe) main loop