diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index b8068300..6cde3ffc 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -74,7 +74,7 @@ print_help() { script-run-workflow [[OPTION] [VALUE]]... Some options (--project, --include, --exclude) can be passed multiple times or - by using comma deliminated strings (see EXAMPLES below) + by using comma-separated strings (see EXAMPLES below) OPTIONS: --project, -p PROJECT @@ -117,6 +117,7 @@ print_help() { script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]} script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug + script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT --submodule ${ALL_SUBMODULES[2]} \"/path/to/genefile.txt,/path/to/output/dir\" --submodule ${ALL_SUBMODULES[3]} \"/path/to/sgofile\" EOF } @@ -326,7 +327,7 @@ print_header() { # Let user choose project(s) if [[ -z ${PROJECTS[*]} ]]; then num=$((${#projects[@]})) - echo "Enter comma delimited project #'s (from list) to analyze" + echo "Enter a comma-separated list of project numbers to analyze" read -r -p "Or hit Enter to add a new project" response [[ -z $response ]] && ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}") ((YES)) || read -r -p "Hit enter for default ($num): " response @@ -346,7 +347,7 @@ print_header() { done if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then - echo "Enter module #'s to run (by #, comma delimited)" + echo "Enter a comma-separated list of modules to run" ((YES)) || read -r -p "Hit Enter for all (default) or '0' for none: " response if [[ -n $response && $response -ne 0 ]]; then IFS=',' read -ra arr <<< "$response" @@ -359,7 +360,8 @@ print_header() { if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then while :; do - echo "Enter a submodule followed by its arguments as a case delimited string in quotes" + echo "Enter a submodule followed by its arguments as a comma-separated string" + echo "Quote your string if there are any whitespaces" echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\"" ((YES)) || read -r -p "Or hit Enter to continue: " response [[ -z $response ]] && break @@ -520,6 +522,7 @@ init_project() { if ask "You can edit this file in the qhtcp module"; then cat <<-EOF > "$STUDY_INFO_FILE" "ExpNumb","ExpLabel","BackgroundSD","ZscoreJoinSD","AnalysisBy" + EOF fi fi @@ -943,15 +946,10 @@ qhtcp() { module remc -# @section GTF -# @description GTF module for QHTCP -# TODO which components of remc can be parallelized? -# The submodules in remc really like to be run from the REMc dir -# so we pop in and out for now -# NOTE the remc modules could use some love -# * Don't cd within scripts, it's confusing -# * Use arguments to pass configuration variables -# * This allows us to abstract the program away in script-run-workflow and treat it like a module +# @section remc +# @description remc module for QHTCP +# TODO +# * Which components can be parallelized? # @arg $1 string studyInfo file remc() { debug "Running: ${FUNCNAME[0]} $*" @@ -992,14 +990,14 @@ module gtf gtf() { debug "Running: ${FUNCNAME[0]}" gtf_out_dir="${1:-$QHTCP_PROJECT_DIR/out/gtf}" + gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" + gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" + orf_list="${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}" + process_dir="$gtf_out_dir/process" function_dir="$gtf_out_dir/function" component_dir="$gtf_out_dir/component" - gene_association_sgd="${2:-"$APPS_DIR/perl/gene_association.sgd"}" - gene_ontology_obo="${3:-"$APPS_DIR/perl/gene_ontology_edit.obo"}" - orf_list="${4:-"$APPS_DIR/perl/ORF_List_Without_DAmPs.txt"}" - py_gtf_dcon \ "$process_dir" \ "$gtf_out_dir" @@ -1040,26 +1038,26 @@ module gta # TODO # * # * -# @set GTA_OUT_DIR string The GTA output results dir -# @set all_sgd_terms_csv string The all_SGD_GOTerms_for_QHTCPtk.csv file -# @set sgd_terms_tfile string The go_terms.tab file -# @set sgd_features_file string The gene_association.sgd file -# @set gene_ontology_file string The gene_ontology_edit.obo file -# @set zscores_file string The ZScores_interaction.csv file +# @arg $1 string output directory +# @arg $2 string gene_association.sgd +# @arg $3 string gene_ontology_edit.obo +# @arg $4 string go_terms.tab +# @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv +# @arg $6 string zscores_interaction.csv gta() { debug "Running: ${FUNCNAME[0]}" - GTA_OUT_DIR="$QHTCP_PROJECT_DIR/gta" - all_sgd_terms_csv="$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv" - sgd_terms_tfile="$APPS_DIR/r/go_terms.tab" - sgd_features_file="$APPS_DIR/r/gene_association.sgd" - gene_ontology_file="$APPS_DIR/r/gene_ontology_edit.obo" - zscores_file="zscores/zscores_interaction.csv" - + gta_out_dir="${1:-"$QHTCP_PROJECT_DIR/gta"}" + gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" + gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" + sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}" + all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" + zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results + # Sets STUDIES_NUM and NUM_STUDIES get_studies "$STUDY_INFO_FILE" - [[ -d $GTA_OUT_DIR ]] || mkdir "$GTA_OUT_DIR" + [[ -d $gta_out_dir ]] || mkdir "$gta_out_dir" # Loop over the array and create pairwise arrays for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do @@ -1088,13 +1086,13 @@ gta() { for s in "${STUDIES_NUMS[@]}"; do zscores_file="$QHTCP_PROJECT_DIR/Exp$s/$zscores_file" if [[ -f $zscores_file ]]; then - mkdir "$GTA_OUT_DIR/Exp$s" + mkdir "$gta_out_dir/Exp$s" r_gta \ "Exp$s" \ "$zscores_file" \ "$sgd_terms_tfile" \ - "$sgd_features_file" \ - "$GTA_OUT_DIR" + "$gene_association_sgd" \ + "$gta_out_dir" fi done @@ -1102,7 +1100,7 @@ gta() { for combo in "${study_combos[@]}"; do # Split on comma and assign to array IFS=',' read -ra studies <<< "$combo" - r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$GTA_OUT_DIR" + r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$gta_out_dir" done # All studies @@ -1110,7 +1108,7 @@ gta() { # are required r_gta_heatmaps \ "$STUDY_INFO_FILE" \ - "$gene_ontology_file" \ + "$gene_ontology_obo" \ "$sgd_terms_tfile" \ "$all_sgd_terms_csv" \ "$zscores_file" \ @@ -1121,14 +1119,12 @@ gta() { # @section Submodules -# @description Submodules provide functionality to modules and should be reusable -# A submodule only runs by default if called by a module -# Use a submodule for: -# * Calling external scripts -# * Performing repetitive tasks -# * Generalizing code -# * Functions you do not want to perform by default (submodules should be called modules) -# * Should not call cd or pushd (let module dictate) +# @description Submodules are shell wrappers for workflow components in external languages. +# Submodules: +# * Allow scripts to be called by the main workflow script using input\ +# and output arguments as a translation mechanism. +# * Only run by default if called by a module. +# * Can be called directly with its arguments as a comma-separated string submodule r_gta @@ -1320,6 +1316,10 @@ r_interactions() { submodule r_join_interactions # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv +# Output files: +# * REMcRdy_lm_only.csv +# * Shift_only.csv +# * parameters.csv # @arg $1 string The output directory # @arg $2 string The sd value # @arg $3 string The studyInfo file @@ -1552,19 +1552,15 @@ get_studies() { # submodule choose_easy_results_dir # # # @description Chooses an EASY scans directory if the information is undefined -# # TODO: Standardize EASY output, it's hard to understand +# # TODO Standardize EASY output, it's hard to understand # # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring # # @exitcode 0 if successfully choose an EASY results dir # # @set EASY_RESULTS_DIR string The working EASY output directory # choose_easy_results_dir() { # debug "Running: ${FUNCNAME[0]}" - - - # # Always backup existing output # # This would happen if you ran the same experiment twice in one day, for instance # [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR" - # if [[ ! -d $EASY_RESULTS_DIR ]]; then # debug "mkdir $EASY_RESULTS_DIR" # mkdir "$EASY_RESULTS_DIR" @@ -1572,7 +1568,6 @@ get_studies() { # err "Could not create $EASY_RESULTS_DIR" # return 0 # fi - # # echo "Hit enter to use the default EASY results directory: $default_easy_results_dir" # # if ! (( YES )); then # # read -r -p "Or enter a custom directory name, example: $PROJECT" dirname diff --git a/workflow/templates/exp/Notes b/workflow/templates/exp/Notes deleted file mode 100644 index 1f671bc1..00000000 --- a/workflow/templates/exp/Notes +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2024 Bryan C. Roessler -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - diff --git a/workflow/templates/exp/StudyInfo.csv b/workflow/templates/exp/StudyInfo.csv deleted file mode 100644 index 70226305..00000000 --- a/workflow/templates/exp/StudyInfo.csv +++ /dev/null @@ -1,4 +0,0 @@ -ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy -1,ExpName1,NA,NA,UserInitials -2,ExpName2,NA,NA,UserInitials -3,ExpName3,NA,NA,UserInitials diff --git a/workflow/templates/exp/ZScores/.DS_Store b/workflow/templates/exp/ZScores/.DS_Store deleted file mode 100644 index 6ff7ba4b..00000000 Binary files a/workflow/templates/exp/ZScores/.DS_Store and /dev/null differ diff --git a/workflow/templates/exp/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/templates/exp/backups/InteractTemplateB4Prompt4SDinput.R deleted file mode 100644 index 2eb34680..00000000 --- a/workflow/templates/exp/backups/InteractTemplateB4Prompt4SDinput.R +++ /dev/null @@ -1,2702 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt - -#Path to Output Directory -W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") -std= as.numeric(Args[2]) -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -delBGFactor <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -X_length <- length(X[1,]) -X_end <- length(X[1,]) - 2 -X <- X[,c(1:46,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 - - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -Damps <- read.delim(DAmPs_List,header=F) - -X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(delBGFactor*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#delBGFactor <- as.numeric(Delta_Background_sdFactor) -#} -