diff --git a/workflow/templates/qhtcp/Code/GTAtemplate.R b/workflow/templates/qhtcp/Code/GTAtemplate.R index 71e4a2c7..06a815fc 100644 --- a/workflow/templates/qhtcp/Code/GTAtemplate.R +++ b/workflow/templates/qhtcp/Code/GTAtemplate.R @@ -1,27 +1,27 @@ #GTA (GoTermAveraging) Starting (Working Directory is /Code) All paths relative to /Code #Your output may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes #Loops thru the number of experiments involved in study. JWR - Wstudy= getwd() - if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){ - inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv' - expName= "Exp1" - dir.create("../GTAresults/Exp1") - } - if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){ - inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv' - expName[2]= "Exp2" - dir.create("../GTAresults/Exp2") - } - if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){ - inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv' - expName[3]= "Exp3" - dir.create("../GTAresults/Exp3") - } - if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){ - inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv' - expName[4]= "Exp4" - dir.create("../GTAresults/Exp4") - } +Wstudy= getwd() +if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){ + inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv' + expName= "Exp1" + dir.create("../GTAresults/Exp1") +} +if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){ + inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv' + expName[2]= "Exp2" + dir.create("../GTAresults/Exp2") +} +if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){ + inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv' + expName[3]= "Exp3" + dir.create("../GTAresults/Exp3") +} +if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){ + inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv' + expName[4]= "Exp4" + dir.create("../GTAresults/Exp4") +} outputPathGTA= "../GTAresults" #dir.create(outPathGTA) diff --git a/workflow/templates/qhtcp/Code/StudyInfo.csv b/workflow/templates/qhtcp/Code/StudyInfo.csv index 63694f8b..70226305 100644 --- a/workflow/templates/qhtcp/Code/StudyInfo.csv +++ b/workflow/templates/qhtcp/Code/StudyInfo.csv @@ -2,4 +2,3 @@ ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy 1,ExpName1,NA,NA,UserInitials 2,ExpName2,NA,NA,UserInitials 3,ExpName3,NA,NA,UserInitials -4,ExpName4,NA,NA,UserInitials \ No newline at end of file diff --git a/workflow/templates/qhtcp/Documentation/.~lock.QHTCP Analysis SystemRev2c.docx# b/workflow/templates/qhtcp/Documentation/.~lock.QHTCP Analysis SystemRev2c.docx# deleted file mode 100644 index d49970a5..00000000 --- a/workflow/templates/qhtcp/Documentation/.~lock.QHTCP Analysis SystemRev2c.docx# +++ /dev/null @@ -1 +0,0 @@ -,jwrodger,hartmanlab.genetics.uab.edu,24.05.2023 07:56,file:///home/jwrodger/.config/libreoffice/4; \ No newline at end of file diff --git a/workflow/templates/qhtcp/Documentation/~$_0329_SS_Q_HTCP_Analysis_Readme.docx b/workflow/templates/qhtcp/Documentation/~$_0329_SS_Q_HTCP_Analysis_Readme.docx deleted file mode 100644 index de033665..00000000 Binary files a/workflow/templates/qhtcp/Documentation/~$_0329_SS_Q_HTCP_Analysis_Readme.docx and /dev/null differ diff --git a/workflow/templates/qhtcp/Documentation/~$ngyu_REMc_Instruction for clustering and heatmap plotting _updated2013Mar.docx b/workflow/templates/qhtcp/Documentation/~$ngyu_REMc_Instruction for clustering and heatmap plotting _updated2013Mar.docx deleted file mode 100644 index de033665..00000000 Binary files a/workflow/templates/qhtcp/Documentation/~$ngyu_REMc_Instruction for clustering and heatmap plotting _updated2013Mar.docx and /dev/null differ diff --git a/workflow/templates/qhtcp/Exp2/.DS_Store b/workflow/templates/qhtcp/Exp2/.DS_Store deleted file mode 100644 index cae6fe4d..00000000 Binary files a/workflow/templates/qhtcp/Exp2/.DS_Store and /dev/null differ diff --git a/workflow/templates/qhtcp/Exp2/ExpFrontend.m b/workflow/templates/qhtcp/Exp2/ExpFrontend.m deleted file mode 100644 index 25a6399b..00000000 --- a/workflow/templates/qhtcp/Exp2/ExpFrontend.m +++ /dev/null @@ -1,214 +0,0 @@ -%Frontend240417.m -%augmented ExpFrontend to provide a quick text archive and a robust -%database ammenable archive -%FrontEnd utility to copy source result sheet into Exp_ folders of -%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path -%capture to the StudiesDataArchieve.txt study log. -%Select, copy and Capture Study Exp_ details to study log - -%Exp meta data collection -W=pwd; -%Load results file meta data into workspace -try -ExpLabel= strcat('Exp',W(end)) -questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); -[resFile,resPath]= uigetfile('*.txt') -copyfile((fullfile(resPath,resFile)),fullfile(W)) -resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) -cd .. -Wstudy= pwd -studyDate= datetime('now'); -S.sDate(1) = {studyDate}; -if ispc - lastSep=max(strfind(Wstudy,'\')) - studyName= Wstudy((lastSep+1):end) -else - lastSep=max(strfind(Wstudy,'/')) - studyName= Wstudy((lastSep+1):end) -end - -S.sDate(1)= {studyDate}; -S.sName(1)= {studyName} -S.sPath(1)= {Wstudy} -S.ELabel(1)= {ExpLabel} -S.EresDate(1)= {resDate} -S.EresFile(1)= {resFile} -S.EresPath(1)= {resPath} - -cd .. - -fid = fopen('StudiesDataArchive.txt','a'); -fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); -fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); -fclose(fid); -fclose('all'); - -catch - cd(W) - disp('Error: Unable to Execute ExpFrontend.m') -end - -cd(W) -pwd - - - -%************************************************************************************* -%************************************************************************************* -%##################################################################################### -%Improved storage ammenable for database use -%FrontEnd utility to copy source result sheet into Exp_ folders of -%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path -%capture to the StudiesDataArchieve.txt study log. -%Select, copy and Capture Study Exp_ details to study log -studyDateNow= studyDate %preserve the 'Now' date -nowNumFNm= strcat((int2str(now)),'.mat') % from previous section incase the /StudyName doesn't have a standard date -%capture the /StudiesQHTCP directory for storing log data -cd ../.. -logPath= pwd %is /.../Exp_ -cd(W) -%Try to load an existing data set from previous Frontend calls -try %end ~ln121 - load(fullfile(logPath,'.studyLog.mat')) %load(fullfile('../../','studyLog.mat')) -catch %If no studyLog.mat found [Initial First Entry] - ExpLabel= strcat('Exp',W(end)) - resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) - cd .. %move up to current study folder from ../Exp_ folder; - %Study meta data collection - Wstudy= pwd %Capture the specific current Study directory - try %try to extract study date from folder name - studyDate= char(regexp(Wstudy, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)| \d\d\d\d\d\d)','match')) - S.sDate(1)= {studyDate}; - catch %if unsuccessful, accept the current date and convert to a 6 char string - studyDate= yyyymmdd(studyDateNow); - studyDate= (int2str(studyDate)) - studyDate= studyDate(3:8) - end - %Capture StudyName - if ispc - lastSep=max(strfind(Wstudy,'\')) - studyName= Wstudy((lastSep+1):end) - else - lastSep=max(strfind(Wstudy,'/')) - studyName= Wstudy((lastSep+1):end) - - end - %Put current First data entry into Structure of Save to StudyLog.mat file - S.sDate(1)= {studyDate}; - S.sName(1)= {studyName} - S.sPath(1)= {Wstudy} - S.ELabel(1)= {ExpLabel} - S.EresDate(1)= {resDate} - S.EresFile(1)= {resFile} - S.EresPath(1)= {resPath} - - cd(W) - -logFiletxt= fullfile(logPath,'StudiesDataArchive4DB.txt') %relative .txt path -%Initialize the StudiesDataArchive.txt file with the First row of meta data - -%Print to a .txt spreadsheet the first set of data in the logFiletxt file -fid = fopen(logFiletxt,'w'); -fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); -fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',studyDate,studyName,Wstudy,ExpLabel,resDate,resPath,resFile); -fclose(fid); -%Save the first entry meta data into a permanent .mat files for future recall -save(fullfile(logPath,'studyLog4DB.mat'), 'S') -save(fullfile(logPath,'.studyLog.mat'), 'S') -save(fullfile(logPath,'.nowNumFNm.mat'), 'S') - - - -end %end for try for the First entry only -%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -%++++++++END of FIRST ENTRY startup section++++++++++++++++++++++++++++++++ -%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -%Begin routine for all Entries After the Initial data entry**************** -ExpLabel= strcat('Exp',W(end)) -resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) %Capture date from !!Results file - -cd .. %Move to the current study folder - -%Capture study meta dat -Wstudy= pwd % Capture the path to the current study - - %Attempt to extract study date from folder name and convert to a - %searchable numeric date - studyDate= char(regexp(Wstudy, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)| \d\d\d\d\d\d)','match')) - if ~isempty(studyDate) %If no date in folder name use current date and convert to 6char string - S.sDate(1)= {studyDate}; - if isequal((str2num(studyDate(3))),'_') %Remove '_' so that date is a number for easy DB search - studyDate= strcat(studyDate(1:2),studyDate(4:6)) - end - else - studyDate= yyyymmdd(studyDateNow); - studyDate= (int2str(studyDate)) - studyDate= studyDate(3:8) - end - -if ispc - lastSep=max(strfind(Wstudy,'\')) - studyName= Wstudy((lastSep+1):end) -else - lastSep=max(strfind(Wstudy,'/')) - studyName= Wstudy((lastSep+1):end) -end - -%Put meta data into a structure for storage (studyLog.mat) and future -%compare operations -lastRow= length(S.sPath) - -%Compare these just obtained paths with those stored in the studyLog.mat -%file and add a new row if the study or results sheet meta data is new or different -cd .. %Move the the /QHTPCstudies folder -logtxt= 'StudiesDataArchive4DB.txt' -logFiletxt= fullfile(logPath,logtxt) % -try - matched=0; - for n= 1:(lastRow) - if strcmp(Wstudy,S.sPath(n)) && strcmp(resPath, S.EresPath(n)) && strcmp(ExpLabel, S.ELabel(n)) - matched= 1; - break - end - end - - -%Update the StudiesDataArchive.txt file with a new row of meta data -if matched== 0 %if no match for this entry then add a line to the text archive file - fid = fopen(logFiletxt,'w'); - S.sDate(lastRow+1)= {studyDate}; - S.sName(lastRow+1)= {studyName}; - S.sPath(lastRow+1)= {Wstudy}; - S.ELabel(lastRow+1)= {ExpLabel}; - S.EresDate(lastRow+1)= {resDate}; - S.EresFile(lastRow+1)= {resFile}; - S.EresPath(lastRow+1)= {resPath}; - for n= 1:(lastRow +1) - %Print to a .txt spreadsheet and Save update to .mat files - if n==1 - fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); - end - fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{n},S.sName{n},S.sPath{n},S.ELabel{n},S.EresDate{n},S.EresPath{n},S.EresFile{n}); - end - - fclose(fid); %close DBase amenable study log file after data update - - save((fullfile(logPath,'studyLog4DB.mat')), 'S') % path for studyLog.mat - save((fullfile(logPath,'.studyLog.mat')), 'S') %path backup studyLogBU.mat - save((fullfile(logPath,strcat('.',nowNumFNm))), 'S') -end -fclose('all'); - -catch - cd(W) %Return to the location of the frontend.m code (/studyName/Exp_) - fclose('all'); - clear S %clear data structure variable 'S.' -end - -cd(W) %Return to the location of the frontend.m code (/studyName/Exp_) -clear all %clear workspace variables - - diff --git a/workflow/templates/qhtcp/Exp2/ZScores/.DS_Store b/workflow/templates/qhtcp/Exp2/ZScores/.DS_Store deleted file mode 100644 index 9f5f58c5..00000000 Binary files a/workflow/templates/qhtcp/Exp2/ZScores/.DS_Store and /dev/null differ diff --git a/workflow/templates/qhtcp/Exp2/Z_InteractionTemplate.R b/workflow/templates/qhtcp/Exp2/Z_InteractionTemplate.R deleted file mode 100644 index fe31ca14..00000000 --- a/workflow/templates/qhtcp/Exp2/Z_InteractionTemplate.R +++ /dev/null @@ -1,2730 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt -#Tool to find a file and copy it to desired location -destDir= getwd() -#srcFile= file.choose() -#file.copy(srcFile, destDir) -#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) - - - -#Path to Output Directory -#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#std= as.numeric(Args[2]) -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") - -#User prompt for std multiplier Value -cat("Enter a Standard Deviation value to noise filter \n") -inpChar<- readLines(file("stdin"), n = 1L) -cat(paste("Standard Deviation Value is", inpChar, "\n")) -inpNum= as.numeric(inpChar) -#set std deviation multiplier default if no user entry -if(!is.na(inpNum)){ - std= inpNum -}else{std= 3} - - -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -#X_length <- length(X[1,]) -#X_end <- length(X[1,]) - 2 -#X <- X[,c(1:42,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col -#as was the case in Gemcitabin and Cytarabin experiments. -#The following allows user to rename columns so as to get the appropriate -#data where it needs to be for the script to run properly. -#colnames(X)[7] <- "Modifier1" -#colnames(X)[8] <- "Conc1" -#colnames(X)[10] <- "Drug" -#colnames(X)[11] <- "Conc" - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin -X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -#jlh confirmed to leave dAmps in so comment out this section -#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -#Damps <- read.delim(DAmPs_List,header=F) - -#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R deleted file mode 100644 index 3b11777c..00000000 --- a/workflow/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R +++ /dev/null @@ -1,2702 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt - -#Path to Output Directory -W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") -std= as.numeric(Args[2]) -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -X_length <- length(X[1,]) -X_end <- length(X[1,]) - 2 -X <- X[,c(1:46,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 - - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -Damps <- read.delim(DAmPs_List,header=F) - -X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp3/.DS_Store b/workflow/templates/qhtcp/Exp3/.DS_Store deleted file mode 100644 index ccdc65b6..00000000 Binary files a/workflow/templates/qhtcp/Exp3/.DS_Store and /dev/null differ diff --git a/workflow/templates/qhtcp/Exp3/.Rhistory b/workflow/templates/qhtcp/Exp3/.Rhistory deleted file mode 100644 index e69de29b..00000000 diff --git a/workflow/templates/qhtcp/Exp3/ExpFrontend.m b/workflow/templates/qhtcp/Exp3/ExpFrontend.m deleted file mode 100644 index 2bdc98c3..00000000 --- a/workflow/templates/qhtcp/Exp3/ExpFrontend.m +++ /dev/null @@ -1,53 +0,0 @@ -%FrontEnd utility to copy source result sheet into Exp_ folders of -%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path -%capture to the StudiesDataArchieve.txt study log. -%Select, copy and Capture Study Exp_ details to study log - -%Exp meta data collection -W=pwd; -%Load matlab stored data file into workspace -try -ExpLabel= strcat('Exp',W(end)) -questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); -[resFile,resPath]= uigetfile('*.txt') -copyfile((fullfile(resPath,resFile)),fullfile(W)) -resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) -cd .. -Wstudy= pwd -studyDate= datetime("today"); -S.sDate(1) = {studyDate}; -if ispc - lastSep=max(strfind(Wstudy,'\')) - studyName= Wstudy((lastSep+1):end) -else - lastSep=max(strfind(Wstudy,'/')) - studyName= Wstudy((lastSep+1):end) -end - -S.sDate(1)= {studyDate}; -S.sName(1)= {studyName} -S.sPath(1)= {Wstudy} -S.ELabel(1)= {ExpLabel} -S.EresDate(1)= {resDate} -S.EresFile(1)= {resFile} -S.EresPath(1)= {resPath} - -cd .. - -fid = fopen('StudiesDataArchive.txt','a'); -fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); -fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); -fclose(fid); -%save((fullfile(pwd,'studyLog.mat')), 'S') -fclose('all'); - -catch - cd(W) - disp('Error: Unable to Execute ExpFrontend.m') -end - - - - - - diff --git a/workflow/templates/qhtcp/Exp3/NotesExp3 b/workflow/templates/qhtcp/Exp3/NotesExp3 deleted file mode 100644 index e69de29b..00000000 diff --git a/workflow/templates/qhtcp/Exp3/Z_InteractionTemplate.R b/workflow/templates/qhtcp/Exp3/Z_InteractionTemplate.R deleted file mode 100644 index fe31ca14..00000000 --- a/workflow/templates/qhtcp/Exp3/Z_InteractionTemplate.R +++ /dev/null @@ -1,2730 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt -#Tool to find a file and copy it to desired location -destDir= getwd() -#srcFile= file.choose() -#file.copy(srcFile, destDir) -#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) - - - -#Path to Output Directory -#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#std= as.numeric(Args[2]) -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") - -#User prompt for std multiplier Value -cat("Enter a Standard Deviation value to noise filter \n") -inpChar<- readLines(file("stdin"), n = 1L) -cat(paste("Standard Deviation Value is", inpChar, "\n")) -inpNum= as.numeric(inpChar) -#set std deviation multiplier default if no user entry -if(!is.na(inpNum)){ - std= inpNum -}else{std= 3} - - -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -#X_length <- length(X[1,]) -#X_end <- length(X[1,]) - 2 -#X <- X[,c(1:42,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col -#as was the case in Gemcitabin and Cytarabin experiments. -#The following allows user to rename columns so as to get the appropriate -#data where it needs to be for the script to run properly. -#colnames(X)[7] <- "Modifier1" -#colnames(X)[8] <- "Conc1" -#colnames(X)[10] <- "Drug" -#colnames(X)[11] <- "Conc" - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin -X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -#jlh confirmed to leave dAmps in so comment out this section -#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -#Damps <- read.delim(DAmPs_List,header=F) - -#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R deleted file mode 100644 index 3b11777c..00000000 --- a/workflow/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R +++ /dev/null @@ -1,2702 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt - -#Path to Output Directory -W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") -std= as.numeric(Args[2]) -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -X_length <- length(X[1,]) -X_end <- length(X[1,]) - 2 -X <- X[,c(1:46,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 - - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -Damps <- read.delim(DAmPs_List,header=F) - -X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp4/.DS_Store b/workflow/templates/qhtcp/Exp4/.DS_Store deleted file mode 100644 index 407f212c..00000000 Binary files a/workflow/templates/qhtcp/Exp4/.DS_Store and /dev/null differ diff --git a/workflow/templates/qhtcp/Exp4/ExpFrontend.m b/workflow/templates/qhtcp/Exp4/ExpFrontend.m deleted file mode 100644 index 2bdc98c3..00000000 --- a/workflow/templates/qhtcp/Exp4/ExpFrontend.m +++ /dev/null @@ -1,53 +0,0 @@ -%FrontEnd utility to copy source result sheet into Exp_ folders of -%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path -%capture to the StudiesDataArchieve.txt study log. -%Select, copy and Capture Study Exp_ details to study log - -%Exp meta data collection -W=pwd; -%Load matlab stored data file into workspace -try -ExpLabel= strcat('Exp',W(end)) -questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); -[resFile,resPath]= uigetfile('*.txt') -copyfile((fullfile(resPath,resFile)),fullfile(W)) -resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) -cd .. -Wstudy= pwd -studyDate= datetime("today"); -S.sDate(1) = {studyDate}; -if ispc - lastSep=max(strfind(Wstudy,'\')) - studyName= Wstudy((lastSep+1):end) -else - lastSep=max(strfind(Wstudy,'/')) - studyName= Wstudy((lastSep+1):end) -end - -S.sDate(1)= {studyDate}; -S.sName(1)= {studyName} -S.sPath(1)= {Wstudy} -S.ELabel(1)= {ExpLabel} -S.EresDate(1)= {resDate} -S.EresFile(1)= {resFile} -S.EresPath(1)= {resPath} - -cd .. - -fid = fopen('StudiesDataArchive.txt','a'); -fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); -fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); -fclose(fid); -%save((fullfile(pwd,'studyLog.mat')), 'S') -fclose('all'); - -catch - cd(W) - disp('Error: Unable to Execute ExpFrontend.m') -end - - - - - - diff --git a/workflow/templates/qhtcp/Exp4/NotesExp4 b/workflow/templates/qhtcp/Exp4/NotesExp4 deleted file mode 100644 index e69de29b..00000000 diff --git a/workflow/templates/qhtcp/Exp4/Z_InteractionTemplate.R b/workflow/templates/qhtcp/Exp4/Z_InteractionTemplate.R deleted file mode 100644 index fe31ca14..00000000 --- a/workflow/templates/qhtcp/Exp4/Z_InteractionTemplate.R +++ /dev/null @@ -1,2730 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt -#Tool to find a file and copy it to desired location -destDir= getwd() -#srcFile= file.choose() -#file.copy(srcFile, destDir) -#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) - - - -#Path to Output Directory -#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#std= as.numeric(Args[2]) -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") - -#User prompt for std multiplier Value -cat("Enter a Standard Deviation value to noise filter \n") -inpChar<- readLines(file("stdin"), n = 1L) -cat(paste("Standard Deviation Value is", inpChar, "\n")) -inpNum= as.numeric(inpChar) -#set std deviation multiplier default if no user entry -if(!is.na(inpNum)){ - std= inpNum -}else{std= 3} - - -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -#X_length <- length(X[1,]) -#X_end <- length(X[1,]) - 2 -#X <- X[,c(1:42,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col -#as was the case in Gemcitabin and Cytarabin experiments. -#The following allows user to rename columns so as to get the appropriate -#data where it needs to be for the script to run properly. -#colnames(X)[7] <- "Modifier1" -#colnames(X)[8] <- "Conc1" -#colnames(X)[10] <- "Drug" -#colnames(X)[11] <- "Conc" - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin -X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -#jlh confirmed to leave dAmps in so comment out this section -#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -#Damps <- read.delim(DAmPs_List,header=F) - -#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R deleted file mode 100644 index 3b11777c..00000000 --- a/workflow/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R +++ /dev/null @@ -1,2702 +0,0 @@ -#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. -#Adapt SS For Structured Data storage but using command line scripts -###Set up the required libraries, call required plot theme elements and set up the command line arguments -library("ggplot2") -library("plyr") -library("extrafont") -library("gridExtra") -library("gplots") -library("RColorBrewer") -library("stringr") -#library("gdata") -library(plotly) -library(htmlwidgets) - -Args <- commandArgs(TRUE) -input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt - -#Path to Output Directory -W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this -outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") -subDir <- outDir #Args[2] - -if (file.exists(subDir)){ - outputpath <- subDir -} else { - dir.create(file.path(subDir)) -} - -if (file.exists(paste(subDir,"QC/",sep=""))){ - outputpath_QC <- paste(subDir,"QC/",sep="") -} else { - dir.create(file.path(paste(subDir,"QC/",sep=""))) - outputpath_QC <- paste(subDir,"QC/",sep="") -} -#define the output path (formerly the second argument from Rscript) -outputpath <- outDir - -#Set Args[2] the Background contamination noise filter as a function of standard deviation -#Sean recommends 3 or 5 SD factor. -#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference -Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") -print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") -std= as.numeric(Args[2]) -expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) -Labels[expNumber,3]= as.numeric(std) -Delta_Background_sdFactor <- std -DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#Write Background SD value to studyInfo.txt file -#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) -print('ln 50 write StudyInfo.csv ') -#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ - - -#read in the data -X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) -X <- X[!(X[[1]]%in%c("","Scan")),] -#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 - -#X <- X[which(X$Specifics == "WT"),] - -X_length <- length(X[1,]) -X_end <- length(X[1,]) - 2 -X <- X[,c(1:46,X_end:X_length)] - - -#use numeric data to perform operations -X$Col <- as.numeric(X$Col) -X$Row <- as.numeric(X$Row) -X$l <- as.numeric(X$l) -X$K <- as.numeric(X$K) -X$r <- as.numeric(X$r) -X$Scan <- as.numeric(X$Scan) -X$AUC <- as.numeric(X$AUC) -X$LstBackgrd <- as.numeric(X$LstBackgrd) -X$X1stBackgrd <- as.numeric(X$X1stBackgrd) - -#set the OrfRep to YDL227C for the ref data -X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" -#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. -#That is the first DM plates are removed from the data set with the following. -X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 - - -#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. -#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. -#try(X <- X[X$ORF != "bad_spot",]) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= - -#get total number of drug concentrations -Total_Conc_Nums <- length(unique(X$Conc)) - -#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) -numextract <- function(string){ - str_extract(string, "\\-*\\d+\\.*\\d*") -} - -#generate a new column with the numeric drug concs -X$Conc_Num <- as.numeric(numextract(X$Conc)) -#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later -X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 - -#Get the max factor for concentration -MAX_CONC <- max(X$Conc_Num_Factor) -#if treating numbers not as factors uncomment next line and comment out previous line -#MAX_CONC <- max(X$Conc_Num) - -#remove wells with problems for making graphs and to not include in summary statistics -X <- X[X$Gene != "BLANK",] -X <- X[X$Gene != "Blank",] -X <- X[X$ORF != "Blank",] -X <- X[X$Gene != "blank",] -#X <- X[X$Gene != "HO",] -Xbu= X -#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later -#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results -#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. -SGDgeneList= "../Code/SGD_features.tab" -genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) -for(i in 1:length(X[,14])){ - ii= as.numeric(i) - line_num = match(X[ii,14],genes[,1],nomatch=1) - OrfRepColNum= as.numeric(match('OrfRep',names(X))) - if(X[ii,OrfRepColNum]!= "YDL227C"){ - X[ii,15] = genes[line_num,2] - } - if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ - X[ii,15] = X[ii,OrfRepColNum] - } -} -Xblankreplace= X -#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing - -#Remove dAmPs ******************************* -DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" -Damps <- read.delim(DAmPs_List,header=F) - -X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] -XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control -# *********** - - -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ -#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -print("ln137 End of User Section including blank gene writeOver") -#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - library(grid) - library(ggthemes) - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - -} - -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - -} - - -#print timestamp for initial time the code starts -timestamp() -#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ -###Part 2 - Quality control -#print quality control graphs for each dataset before removing data due to contamination -#and before adjusting missing data to max theoretical values - -#plate analysis plot -#plate analysis is a quality check to identify plate effects containing anomalies - -Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - - - -Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() - -Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() - -Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() - -Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() - -#quality control - values with a high delta background likely have heavy contamination -#check the frequency of these values -#report the L and K values of these spots -#report the number to be removed based on the Delta_Background_Tolerance -X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd - - -#raw l vs K before QC -Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K before QC") + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) -Raw_l_vs_K_beforeQC -dev.off() -pgg <- ggplotly(Raw_l_vs_K_beforeQC) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - -#set delta background tolerance based on 3 sds from the mean delta background -Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) -#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) -print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) - -Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() - - - -X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] - -X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 -X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] - -X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + - annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, - label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + - theme_Publication_legend_right() -pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) -X_Delta_Backgrd_above_Tolerance_L_vs_K -dev.off() -pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) -#pgg -plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") -saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - -#frequency plot for all data vs. the delta_background -DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + - ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -#bar plot for all data vs. the delta_background -DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + - ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() - -pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) -print(DeltaBackground_Frequency_Plot) -print(DeltaBackground_Bar_Plot) -dev.off() - - -#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values -#1 for missing data, 0 for non missing data -#Use "NG" for NoGrowth rather than "missing" -X$NG <- 0 -try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) - -#1 for removed data, 0 non removed data -#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" -X$DB <- 0 -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) - -#replace the CPPs for l, r, AUC and K (must be last!) for removed data -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) -try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) - - -Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L -Plate_Analysis_L_afterQC -Plate_Analysis_K -Plate_Analysis_K_afterQC -Plate_Analysis_r -Plate_Analysis_r_afterQC -Plate_Analysis_AUC -Plate_Analysis_AUC_afterQC -Plate_Analysis_Delta_Backgrd -Plate_Analysis_Delta_Backgrd_afterQC -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box -Plate_Analysis_L_Box_afterQC -Plate_Analysis_K_Box -Plate_Analysis_K_Box_afterQC -Plate_Analysis_r_Box -Plate_Analysis_r_Box_afterQC -Plate_Analysis_AUC_Box -Plate_Analysis_AUC_Box_afterQC -Plate_Analysis_Delta_Backgrd_Box -Plate_Analysis_Delta_Backgrd_Box_afterQC -dev.off() - -#remove the zero values and print plate analysis -X_noZero <- X[which(X$l > 0),] -Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - - -Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() - -Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() - -Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() - -Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() - -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + - ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) -Plate_Analysis_L_afterQC_Z -Plate_Analysis_K_afterQC_Z -Plate_Analysis_r_afterQC_Z -Plate_Analysis_AUC_afterQC_Z -Plate_Analysis_Delta_Backgrd_afterQC_Z -dev.off() - -#print the plate analysis data before and after QC -pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) -Plate_Analysis_L_Box_afterQC_Z -Plate_Analysis_K_Box_afterQC_Z -Plate_Analysis_r_Box_afterQC_Z -Plate_Analysis_AUC_Box_afterQC_Z -Plate_Analysis_Delta_Backgrd_Box_afterQC_Z -dev.off() - -#remove dataset with zeros removed -rm(X_noZero) - - -#X_test_missing_and_removed <- X[X$Removed == 1,] - -#calculate summary statistics for all strains, including both background and the deletions -X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) -) -#print(X_stats_ALL_L) - -write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) -#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -##### Part 3 - Generate summary statistics and calculate the max theoretical L value -##### Calculate the Z score at each drug conc for each deletion strain - - -#get the background strains - can be modified to take another argument but for most screens will just be YDL227C -Background_Strains <- c("YDL227C") - -#first part of loop will go through for each background strain -#most cases there will only be one YDL227C -for(s in Background_Strains){ - X_Background <- X[X$OrfRep == s,] - - #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics - #we may want to consider in some cases giving the max high value to L depending on the data type - if(table(X_Background$l)[1] == 0){ - X_Background[X_Background$l == 0,]$l <- NA - X_Background[X_Background$K == 0,]$K <- NA - X_Background[X_Background$r == 0,]$r <- NA - X_Background[X_Background$AUC == 0,]$AUC <- NA - } - - X_Background <- X_Background[!is.na(X_Background$l),] - - #get summary stats for L, K, R, AUC - X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l,na.rm=TRUE), - median = median(l,na.rm=TRUE), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L) - X1_SD <- max(X_stats_BY_L$sd) - - X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(K)), - mean = mean(K,na.rm=TRUE), - median = median(K,na.rm=TRUE), - max = max(K,na.rm=TRUE), - min = min(K,na.rm=TRUE), - sd = sd(K,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_K <- max(X_stats_BY_K$sd) - - - X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(r), - mean = mean(r,na.rm=TRUE), - median = median(r,na.rm=TRUE), - max = max(r,na.rm=TRUE), - min = min(r,na.rm=TRUE), - sd = sd(r,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_r <- max(X_stats_BY_r$sd) - - X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = length(AUC), - mean = mean(AUC,na.rm=TRUE), - median = median(AUC,na.rm=TRUE), - max = max(AUC,na.rm=TRUE), - min = min(AUC,na.rm=TRUE), - sd = sd(AUC,na.rm=TRUE), - se = sd / sqrt(N-1) - ) - - X1_SD_AUC <- max(X_stats_BY_AUC$sd) - - X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_L = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - se_AUC = sd_AUC / sqrt(N-1) - ) - - write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) - - #calculate the max theoretical L values - #only look for max values when K is within 2SD of the ref strain - for(q in unique(X$Conc_Num_Factor)){ - if(q == 0){ - X_within_2SD_K <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] - X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - - } - if(q > 0){ - X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] - X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] - X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) - - X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] - #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] - X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] - X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) - } - } - - X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1), - z_max = (max-mean)/sd - ) - print(X_stats_BY_L_within_2SD_K) - X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) - write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) - - X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean = mean(l), - median = median(l), - max = max(l,na.rm=TRUE), - min = min(l,na.rm=TRUE), - sd = sd(l), - se = sd / sqrt(N-1) - ) - print(X_stats_BY_L_outside_2SD_K) - X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) - - #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] - Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + - ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - print(Outside_2SD_K_L_vs_K) - dev.off() - pgg <- ggplotly(Outside_2SD_K_L_vs_K) - plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + - ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() - pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) - Outside_2SD_K_delta_background_vs_K - dev.off() - pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) - #pgg - plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - - #get the background strain mean values at the no drug conc to calculate shift - Background_L <- X_stats_BY_L$mean[1] - Background_K <- X_stats_BY_K$mean[1] - Background_r <- X_stats_BY_r$mean[1] - Background_AUC <- X_stats_BY_AUC$mean[1] - - #create empty plots for plotting element - p_l <- ggplot() - p_K <- ggplot() - p_r <- ggplot() - p_AUC <- ggplot() - - p_rf_l <- ggplot() - p_rf_K <- ggplot() - p_rf_r <- ggplot() - p_rf_AUC <- ggplot() - - #get only the deletion strains - X2 <- X - X2 <- X2[X2$OrfRep != "YDL227C",] - - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2$Conc_Num))){ - Concentration <- unique(X2$Conc_Num)[i] - X2_temp <- X2[X2$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_new <- X2_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - - #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] - #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] - #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] - print(paste("Check loop order, conc =",Concentration,sep=" ")) - - X2_new <- rbind(X2_new,X2_temp) - - } - } - X2 <- X2_new - - - #get only the RF strains - X2_RF <- X - X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] - #if set to max theoretical value, add a 1 to SM, if not, leave as 0 - #SM = Set to Max - X2_RF$SM <- 0 - #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min - for(i in 1:length(unique(X2_RF$Conc_Num))){ - Concentration <- unique(X2_RF$Conc_Num)[i] - X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] - if(Concentration == 0){ - X2_RF_new <- X2_RF_temp - print(paste("Check loop order, conc =",Concentration,sep=" ")) - } - if(Concentration > 0){ - try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) - try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) - print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) - - X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) - - } - } - X2_RF <- X2_RF_new - - - #######Part 4 Get the RF Z score values - #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score - X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") - - num_genes_RF <- length(unique(X2_RF$OrfRep)) - print(num_genes_RF) - - - #create the output data.frame containing columns for each RF strain - InteractionScores_RF <- unique(X2_RF["OrfRep"]) - #InteractionScores_RF$Gene <- unique(X2$Gene) - InteractionScores_RF$Gene <- NA - InteractionScores_RF$Raw_Shift_L <- NA - InteractionScores_RF$Z_Shift_L <- NA - InteractionScores_RF$lm_Score_L <- NA - InteractionScores_RF$Z_lm_L <- NA - InteractionScores_RF$R_Squared_L <- NA - InteractionScores_RF$Sum_Z_Score_L <- NA - InteractionScores_RF$Avg_Zscore_L <- NA - InteractionScores_RF$Raw_Shift_K <- NA - InteractionScores_RF$Z_Shift_K <- NA - InteractionScores_RF$lm_Score_K <- NA - InteractionScores_RF$Z_lm_K <- NA - InteractionScores_RF$R_Squared_K <- NA - InteractionScores_RF$Sum_Z_Score_K <- NA - InteractionScores_RF$Avg_Zscore_K <- NA - InteractionScores_RF$Raw_Shift_r <- NA - InteractionScores_RF$Z_Shift_r <- NA - InteractionScores_RF$lm_Score_r <- NA - InteractionScores_RF$Z_lm_r <- NA - InteractionScores_RF$R_Squared_r <- NA - InteractionScores_RF$Sum_Z_Score_r <- NA - InteractionScores_RF$Avg_Zscore_r <- NA - InteractionScores_RF$Raw_Shift_AUC <- NA - InteractionScores_RF$Z_Shift_AUC <- NA - InteractionScores_RF$lm_Score_AUC <- NA - InteractionScores_RF$Z_lm_AUC <- NA - InteractionScores_RF$R_Squared_AUC <- NA - InteractionScores_RF$Sum_Z_Score_AUC <- NA - InteractionScores_RF$Avg_Zscore_AUC <- NA - InteractionScores_RF$NG <- NA - InteractionScores_RF$SM <- NA - - - for(i in 1:num_genes_RF){ - #get each deletion strain ORF - Gene_Sel <- unique(X2_RF$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - gene_lm_AUC <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - gene_interaction_AUC <- NA - r_squared_AUC <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] - InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] - - InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - - InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL_RF <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) - } - - InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass RF Calculation loop") - - lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) - lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) - lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) - lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) - - print(paste("Mean RF linear regression score L",lm_mean_L)) - - - InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) - InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) - InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) - InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) - - - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] - InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] - write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) - - - for(i in 1:num_genes_RF){ - Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] - - p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_RF_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) - } - } - print("Pass RF ggplot loop") - write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - ####### Part 5 - Get Zscores for Gene deletion strains - - - - #get total number of genes for the next loop - num_genes <- length(unique(X2$OrfRep)) - print(num_genes) - - #create the output data.frame containing columns for each deletion strain - InteractionScores <- unique(X2["OrfRep"]) - #InteractionScores$Gene <- unique(X2$Gene) - InteractionScores$Gene <- NA - InteractionScores$Raw_Shift_L <- NA - InteractionScores$Z_Shift_L <- NA - InteractionScores$lm_Score_L <- NA - InteractionScores$Z_lm_L <- NA - InteractionScores$R_Squared_L <- NA - InteractionScores$Sum_Z_Score_L <- NA - InteractionScores$Avg_Zscore_L <- NA - InteractionScores$Raw_Shift_K <- NA - InteractionScores$Z_Shift_K <- NA - InteractionScores$lm_Score_K <- NA - InteractionScores$Z_lm_K <- NA - InteractionScores$R_Squared_K <- NA - InteractionScores$Sum_Z_Score_K <- NA - InteractionScores$Avg_Zscore_K <- NA - InteractionScores$Raw_Shift_r <- NA - InteractionScores$Z_Shift_r <- NA - InteractionScores$lm_Score_r <- NA - InteractionScores$Z_lm_r <- NA - InteractionScores$R_Squared_r <- NA - InteractionScores$Sum_Z_Score_r <- NA - InteractionScores$Avg_Zscore_r <- NA - InteractionScores$Raw_Shift_AUC <- NA - InteractionScores$Z_Shift_AUC <- NA - InteractionScores$lm_Score_AUC <- NA - InteractionScores$Z_lm_AUC <- NA - InteractionScores$R_Squared_AUC <- NA - InteractionScores$Sum_Z_Score_AUC <- NA - InteractionScores$Avg_Zscore_AUC <- NA - InteractionScores$NG <- NA - InteractionScores$DB <- NA - InteractionScores$SM <- NA - - for(i in 1:num_genes){ - #get each deletion strain ORF - Gene_Sel <- unique(X2$OrfRep)[i] - #extract only the current deletion strain and its data - X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] - - X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, - N = (length(l)), - mean_L = mean(l,na.rm = TRUE), - median_L = median(l,na.rm = TRUE), - sd_L = sd(l,na.rm = TRUE), - se_L = sd_L / sqrt(N-1), - mean_K = mean(K,na.rm = TRUE), - median_K = median(K,na.rm = TRUE), - sd_K = sd(K,na.rm = TRUE), - se_K = sd_K / sqrt(N-1), - mean_r = mean(r,na.rm = TRUE), - median_r = median(r,na.rm = TRUE), - sd_r = sd(r,na.rm = TRUE), - se_r = sd_r / sqrt(N-1), - mean_AUC = mean(AUC,na.rm = TRUE), - median_AUC = median(AUC,na.rm = TRUE), - sd_AUC = sd(AUC,na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N-1), - NG = sum(NG,na.rm=TRUE), - DB= sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - #Get shift vals - #if L is 0, that means the no growth on no drug - #if L is NA at 0, that means the spot was removed due to contamination - #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift - #otherwise calculate shift at no drug conc - if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ - X_stats_interaction$Raw_Shift_L <- 0 - X_stats_interaction$Raw_Shift_K <- 0 - X_stats_interaction$Raw_Shift_r <- 0 - X_stats_interaction$Raw_Shift_AUC <- 0 - X_stats_interaction$Z_Shift_L <- 0 - X_stats_interaction$Z_Shift_K <- 0 - X_stats_interaction$Z_Shift_r <- 0 - X_stats_interaction$Z_Shift_AUC <- 0 - }else{ - X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L - X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K - X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r - X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC - X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] - X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] - X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] - X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] - } - - - #get WT vals - X_stats_interaction$WT_l <- X_stats_BY_L$mean - X_stats_interaction$WT_K <- X_stats_BY_K$mean - X_stats_interaction$WT_r <- X_stats_BY_r$mean - X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean - - #Get WT SD - X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd - X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd - X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd - X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd - - - #only get scores if there's growth at no drug - if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for no growth values in Z score calculation - if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #get linear model - gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) - gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) - - #get interaction score calculated by linear model and R-squared value for the fit - gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - #Get total of non removed values - Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 - - #report the scores - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) - - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) - } - - if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ - - #calculate expected values - X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L - X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K - X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r - X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC - - #calculate normalized delta values - X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L - X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K - X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r - X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC - - #disregard shift for missing values in Z score calculatiom - if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r - X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC - } - #disregard shift for set to max values in Z score calculation - if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ - X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l - #only calculate the L value without shift since L is the only adjusted value - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r - #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC - - } - - - #calculate Z score at each concentration - X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) - X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) - X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) - X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) - - #NA values for the next part since there's an NA or 0 at the no drug. - gene_lm_L <- NA - gene_lm_K <- NA - gene_lm_r <- NA - - gene_interaction_L <- NA - r_squared_l <- NA - gene_interaction_K <- NA - r_squared_K <- NA - gene_interaction_r <- NA - r_squared_r <- NA - - X_stats_interaction$Raw_Shift_L <- NA - X_stats_interaction$Raw_Shift_K <- NA - X_stats_interaction$Raw_Shift_r <- NA - X_stats_interaction$Raw_Shift_AUC <- NA - - X_stats_interaction$Z_Shift_L <- NA - X_stats_interaction$Z_Shift_K <- NA - X_stats_interaction$Z_Shift_r <- NA - X_stats_interaction$Z_Shift_AUC <- NA - - InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) - InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) - - InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] - InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] - InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l - InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] - InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] - InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA - InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K - InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] - InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] - InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r - InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA - - InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] - InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] - InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC - InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA - } - - if(i == 1){ - X_stats_interaction_ALL <- X_stats_interaction - } - if(i > 1){ - X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) - } - - InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) - InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) - InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) - - #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) - - - } - print("Pass Int Calculation loop") - InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] - InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] - df_order_by_OrfRep <- unique(InteractionScores$OrfRep) - #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] - write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) - - InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] - - InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] - InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] - InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] - InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] - InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] - InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) - - #get enhancers and suppressors for linear regression - InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] - - InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] - InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] - InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] - InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] - - write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) - write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) - - - write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) - print('ln 1570 write StudyInfo.csv after ') - #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) - - for(i in 1:num_genes){ - Gene_Sel <- unique(InteractionScores$OrfRep)[i] - X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] - X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] - - p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-65,65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + - annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + - annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + - theme_Publication() - - p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-0.65,0.65)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + - annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + - annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + - theme_Publication() - - p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + - coord_cartesian(ylim = c(-6500,6500)) + - geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + - ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + - annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + - #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + - annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + - annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + - annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + - annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + - scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + - theme_Publication() - - - - if(i == 1){ - X_stats_interaction_ALL_final <- X_ZCalculations - } - if(i > 1){ - X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) - } - } - print("Pass Int ggplot loop") - write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) - - - - - - - Blank <- ggplot(X2_RF) + geom_blank() - - pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes)/3)-1)){ - num <- j[m] - grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] - } - if(num_genes != (num+2)){ - total_num = num_genes - (num+2) - if(total_num == 5){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - - - pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) - - X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, - mean_L = mean(l,na.rm=TRUE), - median_L = median(l,na.rm=TRUE), - max_L = max(l,na.rm=TRUE), - min_L = min(l,na.rm=TRUE), - sd_L = sd(l,na.rm=TRUE), - mean_K = mean(K,na.rm=TRUE), - median_K = median(K,na.rm=TRUE), - max_K = max(K,na.rm=TRUE), - min_K = min(K,na.rm=TRUE), - sd_K = sd(K,na.rm=TRUE), - mean_r = mean(r,na.rm=TRUE), - median_r = median(r,na.rm=TRUE), - max_r = max(r,na.rm=TRUE), - min_r = min(r,na.rm=TRUE), - sd_r = sd(r,na.rm=TRUE), - mean_AUC = mean(AUC,na.rm=TRUE), - median_AUC = median(AUC,na.rm=TRUE), - max_AUC = max(AUC,na.rm=TRUE), - min_AUC = min(AUC,na.rm=TRUE), - sd_AUC = sd(AUC,na.rm=TRUE), - NG = sum(NG,na.rm=TRUE), - DB = sum(DB,na.rm=TRUE), - SM = sum(SM,na.rm=TRUE) - ) - - - L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - annotate("text",x=-0.25,y=10,label="NG") + - annotate("text",x=-0.25,y=5,label="DB") + - annotate("text",x=-0.25,y=0,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + - theme_Publication() - - K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + - annotate("text",x=-0.25,y=-5,label="NG") + - annotate("text",x=-0.25,y=-12.5,label="DB") + - annotate("text",x=-0.25,y=-20,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + - theme_Publication() - - R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - annotate("text",x=-0.25,y=.9,label="NG") + - annotate("text",x=-0.25,y=.8,label="DB") + - annotate("text",x=-0.25,y=.7,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + - theme_Publication() - - AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + - stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), - geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + - scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + - annotate("text",x=-0.25,y=11000,label="NG") + - annotate("text",x=-0.25,y=10000,label="DB") + - annotate("text",x=-0.25,y=9000,label="SM") + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + - annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + - theme_Publication() - - - L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + - theme_Publication() - - K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + - theme_Publication() - - r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + - theme_Publication() - - AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + - scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + - ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + - theme_Publication() - - - grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) - grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) - - - - #plot the references - #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) - #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) - - #loop for grid arrange 4x3 - j <- rep(1,((num_genes_RF)/3)-1) - for(n in 1:length(j)){ - j[n+1] <- n*3 + 1 - } - #loop for printing each plot - num <- 0 - for(m in 1:(round((num_genes_RF)/3)-1)){ - num <- j[m] - grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) - - - #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] - } - if(num_genes_RF != (num+2)){ - total_num = num_genes_RF - (num+2) - if(total_num == 5){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - if(total_num == 4){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - } - - if(total_num == 3){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 2){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - - if(total_num == 1){ - grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) - #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) - } - } - dev.off() - - #print rank plots for L and K gene interactions - - - InteractionScores_AdjustMissing <- InteractionScores - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank <- NA - InteractionScores_AdjustMissing$K_Rank <- NA - InteractionScores_AdjustMissing$r_Rank <- NA - InteractionScores_AdjustMissing$AUC_Rank <- NA - - InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) - InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) - InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) - InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) - - # - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 - InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 - - InteractionScores_AdjustMissing$L_Rank_lm <- NA - InteractionScores_AdjustMissing$K_Rank_lm <- NA - InteractionScores_AdjustMissing$r_Rank_lm <- NA - InteractionScores_AdjustMissing$AUC_Rank_lm <- NA - - InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) - InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) - InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) - InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) - - - - Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - - - - X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] - - #find overlaps - X_NArm$Overlap <- "No Effect" - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") - try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") - try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") - try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") - try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) - L_lm <- summary(get_lm_L) - - get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) - K_lm <- summary(get_lm_K) - - get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) - r_lm <- summary(get_lm_r) - - get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) - AUC_lm <- summary(get_lm_AUC) - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm K") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm r") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Avg Zscore vs lm AUC") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) - - dev.off() - - - lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + - geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() - - pgg <- ggplotly(lm_v_Zscore_L) - #pgg - plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") - saveWidget(pgg, file=plotly_path, selfcontained =TRUE) - - X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) - X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) - X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) - X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) - - X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) - X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) - X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) - X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) - - #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 - get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) - L_lm2 <- summary(get_lm_L2) - - get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) - K_lm2 <- summary(get_lm_K2) - - get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) - r_lm2 <- summary(get_lm_r2) - - get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) - AUC_lm2 <- summary(get_lm_AUC2) - - num_genes_NArm2 <- (dim(X_NArm)[1])/2 - - pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) - - print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm L") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm K") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank Avg Zscore vs lm r") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) - - print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + - ggtitle("Rank of Avg Zscore vs lm AUC") + - annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) - - - - dev.off() - - - - Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + - ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + - ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) - - dev.off() - - - Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + - theme_Publication() - - Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + - theme_Publication() - - Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + - theme_Publication() - - - Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + - theme_Publication() - - Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + - theme_Publication() - - Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + - annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + - theme_Publication() - - - Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + - ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - - Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + - ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + - annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + - geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + - theme_Publication() - - pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) - - grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) - grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) - - dev.off() - -} - - - -#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 -get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) -L_lm_1 <- summary(get_lm_1) - -get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) -L_lm_2 <- summary(get_lm_2) - -get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) -L_lm_3 <- summary(get_lm_3) - -get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) -L_lm_4 <- summary(get_lm_4) - -get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) -L_lm_5 <- summary(get_lm_5) - -get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) -L_lm_6 <- summary(get_lm_6) - - -pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_smooth(method="lm",color="tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] -ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - -ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + - geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + - theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) - - - - - -dev.off() - - -#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) -timestamp() - -#BoneYard*********************************************** -#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. -#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. -#Corruptable but better than nothing. -#if(is.na(Args[2])){ -# std=3 -#}else { -# std= Arg[2] -#Delta_Background_sdFactor <- 2 #Args[3] -#DelBGFactr <- as.numeric(Delta_Background_sdFactor) -#} - diff --git a/workflow/templates/qhtcp/Exp1/.DS_Store b/workflow/templates/qhtcp/ExpTemplate/.DS_Store similarity index 100% rename from workflow/templates/qhtcp/Exp1/.DS_Store rename to workflow/templates/qhtcp/ExpTemplate/.DS_Store diff --git a/workflow/templates/qhtcp/Exp1/.Rhistory b/workflow/templates/qhtcp/ExpTemplate/.Rhistory similarity index 100% rename from workflow/templates/qhtcp/Exp1/.Rhistory rename to workflow/templates/qhtcp/ExpTemplate/.Rhistory diff --git a/workflow/templates/qhtcp/Exp1/ExpFrontend.m b/workflow/templates/qhtcp/ExpTemplate/ExpFrontend.m similarity index 100% rename from workflow/templates/qhtcp/Exp1/ExpFrontend.m rename to workflow/templates/qhtcp/ExpTemplate/ExpFrontend.m diff --git a/workflow/templates/qhtcp/Exp1/NotesExp1 b/workflow/templates/qhtcp/ExpTemplate/NotesExp1 similarity index 100% rename from workflow/templates/qhtcp/Exp1/NotesExp1 rename to workflow/templates/qhtcp/ExpTemplate/NotesExp1 diff --git a/workflow/templates/qhtcp/Exp1/ZScores/.DS_Store b/workflow/templates/qhtcp/ExpTemplate/ZScores/.DS_Store similarity index 100% rename from workflow/templates/qhtcp/Exp1/ZScores/.DS_Store rename to workflow/templates/qhtcp/ExpTemplate/ZScores/.DS_Store diff --git a/workflow/templates/qhtcp/Exp1/Z_InteractionTemplate.R b/workflow/templates/qhtcp/ExpTemplate/Z_InteractionTemplate.R similarity index 100% rename from workflow/templates/qhtcp/Exp1/Z_InteractionTemplate.R rename to workflow/templates/qhtcp/ExpTemplate/Z_InteractionTemplate.R diff --git a/workflow/templates/qhtcp/Exp1/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/templates/qhtcp/ExpTemplate/backups/InteractTemplateB4Prompt4SDinput.R similarity index 100% rename from workflow/templates/qhtcp/Exp1/backups/InteractTemplateB4Prompt4SDinput.R rename to workflow/templates/qhtcp/ExpTemplate/backups/InteractTemplateB4Prompt4SDinput.R