From b5aaf9ffb4b9dc59e3054f1fa17ef07cf3f39710 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Fri, 2 Aug 2024 18:48:09 -0400 Subject: [PATCH] Some more refactoring and cleanup --- workflow/qhtcp-workflow | 95 +- workflow/templates/exp/Notes | 14 - workflow/templates/exp/StudyInfo.csv | 4 - workflow/templates/exp/ZScores/.DS_Store | Bin 6148 -> 0 bytes .../InteractTemplateB4Prompt4SDinput.R | 2702 ----------------- 5 files changed, 45 insertions(+), 2770 deletions(-) delete mode 100644 workflow/templates/exp/Notes delete mode 100644 workflow/templates/exp/StudyInfo.csv delete mode 100644 workflow/templates/exp/ZScores/.DS_Store delete mode 100644 workflow/templates/exp/backups/InteractTemplateB4Prompt4SDinput.R diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index b8068300..6cde3ffc 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -74,7 +74,7 @@ print_help() { script-run-workflow [[OPTION] [VALUE]]... Some options (--project, --include, --exclude) can be passed multiple times or - by using comma deliminated strings (see EXAMPLES below) + by using comma-separated strings (see EXAMPLES below) OPTIONS: --project, -p PROJECT @@ -117,6 +117,7 @@ print_help() { script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]} script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug + script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT --submodule ${ALL_SUBMODULES[2]} \"/path/to/genefile.txt,/path/to/output/dir\" --submodule ${ALL_SUBMODULES[3]} \"/path/to/sgofile\" EOF } @@ -326,7 +327,7 @@ print_header() { # Let user choose project(s) if [[ -z ${PROJECTS[*]} ]]; then num=$((${#projects[@]})) - echo "Enter comma delimited project #'s (from list) to analyze" + echo "Enter a comma-separated list of project numbers to analyze" read -r -p "Or hit Enter to add a new project" response [[ -z $response ]] && ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}") ((YES)) || read -r -p "Hit enter for default ($num): " response @@ -346,7 +347,7 @@ print_header() { done if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then - echo "Enter module #'s to run (by #, comma delimited)" + echo "Enter a comma-separated list of modules to run" ((YES)) || read -r -p "Hit Enter for all (default) or '0' for none: " response if [[ -n $response && $response -ne 0 ]]; then IFS=',' read -ra arr <<< "$response" @@ -359,7 +360,8 @@ print_header() { if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then while :; do - echo "Enter a submodule followed by its arguments as a case delimited string in quotes" + echo "Enter a submodule followed by its arguments as a comma-separated string" + echo "Quote your string if there are any whitespaces" echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\"" ((YES)) || read -r -p "Or hit Enter to continue: " response [[ -z $response ]] && break @@ -520,6 +522,7 @@ init_project() { if ask "You can edit this file in the qhtcp module"; then cat <<-EOF > "$STUDY_INFO_FILE" "ExpNumb","ExpLabel","BackgroundSD","ZscoreJoinSD","AnalysisBy" + EOF fi fi @@ -943,15 +946,10 @@ qhtcp() { module remc -# @section GTF -# @description GTF module for QHTCP -# TODO which components of remc can be parallelized? -# The submodules in remc really like to be run from the REMc dir -# so we pop in and out for now -# NOTE the remc modules could use some love -# * Don't cd within scripts, it's confusing -# * Use arguments to pass configuration variables -# * This allows us to abstract the program away in script-run-workflow and treat it like a module +# @section remc +# @description remc module for QHTCP +# TODO +# * Which components can be parallelized? # @arg $1 string studyInfo file remc() { debug "Running: ${FUNCNAME[0]} $*" @@ -992,14 +990,14 @@ module gtf gtf() { debug "Running: ${FUNCNAME[0]}" gtf_out_dir="${1:-$QHTCP_PROJECT_DIR/out/gtf}" + gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" + gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" + orf_list="${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}" + process_dir="$gtf_out_dir/process" function_dir="$gtf_out_dir/function" component_dir="$gtf_out_dir/component" - gene_association_sgd="${2:-"$APPS_DIR/perl/gene_association.sgd"}" - gene_ontology_obo="${3:-"$APPS_DIR/perl/gene_ontology_edit.obo"}" - orf_list="${4:-"$APPS_DIR/perl/ORF_List_Without_DAmPs.txt"}" - py_gtf_dcon \ "$process_dir" \ "$gtf_out_dir" @@ -1040,26 +1038,26 @@ module gta # TODO # * # * -# @set GTA_OUT_DIR string The GTA output results dir -# @set all_sgd_terms_csv string The all_SGD_GOTerms_for_QHTCPtk.csv file -# @set sgd_terms_tfile string The go_terms.tab file -# @set sgd_features_file string The gene_association.sgd file -# @set gene_ontology_file string The gene_ontology_edit.obo file -# @set zscores_file string The ZScores_interaction.csv file +# @arg $1 string output directory +# @arg $2 string gene_association.sgd +# @arg $3 string gene_ontology_edit.obo +# @arg $4 string go_terms.tab +# @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv +# @arg $6 string zscores_interaction.csv gta() { debug "Running: ${FUNCNAME[0]}" - GTA_OUT_DIR="$QHTCP_PROJECT_DIR/gta" - all_sgd_terms_csv="$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv" - sgd_terms_tfile="$APPS_DIR/r/go_terms.tab" - sgd_features_file="$APPS_DIR/r/gene_association.sgd" - gene_ontology_file="$APPS_DIR/r/gene_ontology_edit.obo" - zscores_file="zscores/zscores_interaction.csv" - + gta_out_dir="${1:-"$QHTCP_PROJECT_DIR/gta"}" + gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" + gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" + sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}" + all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" + zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results + # Sets STUDIES_NUM and NUM_STUDIES get_studies "$STUDY_INFO_FILE" - [[ -d $GTA_OUT_DIR ]] || mkdir "$GTA_OUT_DIR" + [[ -d $gta_out_dir ]] || mkdir "$gta_out_dir" # Loop over the array and create pairwise arrays for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do @@ -1088,13 +1086,13 @@ gta() { for s in "${STUDIES_NUMS[@]}"; do zscores_file="$QHTCP_PROJECT_DIR/Exp$s/$zscores_file" if [[ -f $zscores_file ]]; then - mkdir "$GTA_OUT_DIR/Exp$s" + mkdir "$gta_out_dir/Exp$s" r_gta \ "Exp$s" \ "$zscores_file" \ "$sgd_terms_tfile" \ - "$sgd_features_file" \ - "$GTA_OUT_DIR" + "$gene_association_sgd" \ + "$gta_out_dir" fi done @@ -1102,7 +1100,7 @@ gta() { for combo in "${study_combos[@]}"; do # Split on comma and assign to array IFS=',' read -ra studies <<< "$combo" - r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$GTA_OUT_DIR" + r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$gta_out_dir" done # All studies @@ -1110,7 +1108,7 @@ gta() { # are required r_gta_heatmaps \ "$STUDY_INFO_FILE" \ - "$gene_ontology_file" \ + "$gene_ontology_obo" \ "$sgd_terms_tfile" \ "$all_sgd_terms_csv" \ "$zscores_file" \ @@ -1121,14 +1119,12 @@ gta() { # @section Submodules -# @description Submodules provide functionality to modules and should be reusable -# A submodule only runs by default if called by a module -# Use a submodule for: -# * Calling external scripts -# * Performing repetitive tasks -# * Generalizing code -# * Functions you do not want to perform by default (submodules should be called modules) -# * Should not call cd or pushd (let module dictate) +# @description Submodules are shell wrappers for workflow components in external languages. +# Submodules: +# * Allow scripts to be called by the main workflow script using input\ +# and output arguments as a translation mechanism. +# * Only run by default if called by a module. +# * Can be called directly with its arguments as a comma-separated string submodule r_gta @@ -1320,6 +1316,10 @@ r_interactions() { submodule r_join_interactions # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv +# Output files: +# * REMcRdy_lm_only.csv +# * Shift_only.csv +# * parameters.csv # @arg $1 string The output directory # @arg $2 string The sd value # @arg $3 string The studyInfo file @@ -1552,19 +1552,15 @@ get_studies() { # submodule choose_easy_results_dir # # # @description Chooses an EASY scans directory if the information is undefined -# # TODO: Standardize EASY output, it's hard to understand +# # TODO Standardize EASY output, it's hard to understand # # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring # # @exitcode 0 if successfully choose an EASY results dir # # @set EASY_RESULTS_DIR string The working EASY output directory # choose_easy_results_dir() { # debug "Running: ${FUNCNAME[0]}" - - - # # Always backup existing output # # This would happen if you ran the same experiment twice in one day, for instance # [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR" - # if [[ ! -d $EASY_RESULTS_DIR ]]; then # debug "mkdir $EASY_RESULTS_DIR" # mkdir "$EASY_RESULTS_DIR" @@ -1572,7 +1568,6 @@ get_studies() { # err "Could not create $EASY_RESULTS_DIR" # return 0 # fi - # # echo "Hit enter to use the default EASY results directory: $default_easy_results_dir" # # if ! (( YES )); then # # read -r -p "Or enter a custom directory name, example: $PROJECT" dirname diff --git a/workflow/templates/exp/Notes b/workflow/templates/exp/Notes deleted file mode 100644 index 1f671bc1..00000000 --- a/workflow/templates/exp/Notes +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2024 Bryan C. Roessler -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - diff --git a/workflow/templates/exp/StudyInfo.csv b/workflow/templates/exp/StudyInfo.csv deleted file mode 100644 index 70226305..00000000 --- a/workflow/templates/exp/StudyInfo.csv +++ /dev/null @@ -1,4 +0,0 @@ -ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy -1,ExpName1,NA,NA,UserInitials -2,ExpName2,NA,NA,UserInitials -3,ExpName3,NA,NA,UserInitials diff --git a/workflow/templates/exp/ZScores/.DS_Store b/workflow/templates/exp/ZScores/.DS_Store deleted file mode 100644 index 6ff7ba4b5bd1058198a5917e10a8765f7ba49de7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeI0!AiqG5QhJWJyb>LL8J!-eF32lAeb7jg7nr)BuTA8LP~=@dlx)<7N5k6Ctt#M z@C9UMHn6twVm%Z)AB0JE_n*yd%Fa#!Q2#FL0UZEss0}vJ2>0`~K;T-o0Fd|?qC$!i zQ;Zfy8>fk9&i( zEGDu#(Q&$rucC~iBrg;G#qK`8?7Sbnh0FQN{?y;(#*g)tLk@6>Gk+na!C#c%;p3?u z=m*&S9G>hQNPwa#yqBHt^Cu1-b*&AmWY8 z=yR*~pml)M11#ldma=6ji0{N|^*~NauEZcINqC&dK`IBS9OO1Fa>WO^naNcs#1#$e zRSuHb4a1mcLmN7r|IhJb{cjk?KZaV-AnHrfZH99#=hG|K%{kaV2rvZA= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#delBGFactor <- as.numeric(Delta_Background_sdFactor) -#} -