diff --git a/workflow/.old/templates/qhtcp/Exp2/.DS_Store b/workflow/.old/templates/qhtcp/Exp2/.DS_Store new file mode 100644 index 00000000..cae6fe4d Binary files /dev/null and b/workflow/.old/templates/qhtcp/Exp2/.DS_Store differ diff --git a/workflow/.old/templates/qhtcp/Exp2/ExpFrontend.m b/workflow/.old/templates/qhtcp/Exp2/ExpFrontend.m new file mode 100644 index 00000000..25a6399b --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp2/ExpFrontend.m @@ -0,0 +1,214 @@ +%Frontend240417.m +%augmented ExpFrontend to provide a quick text archive and a robust +%database ammenable archive +%FrontEnd utility to copy source result sheet into Exp_ folders of +%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path +%capture to the StudiesDataArchieve.txt study log. +%Select, copy and Capture Study Exp_ details to study log + +%Exp meta data collection +W=pwd; +%Load results file meta data into workspace +try +ExpLabel= strcat('Exp',W(end)) +questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); +[resFile,resPath]= uigetfile('*.txt') +copyfile((fullfile(resPath,resFile)),fullfile(W)) +resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) +cd .. +Wstudy= pwd +studyDate= datetime('now'); +S.sDate(1) = {studyDate}; +if ispc + lastSep=max(strfind(Wstudy,'\')) + studyName= Wstudy((lastSep+1):end) +else + lastSep=max(strfind(Wstudy,'/')) + studyName= Wstudy((lastSep+1):end) +end + +S.sDate(1)= {studyDate}; +S.sName(1)= {studyName} +S.sPath(1)= {Wstudy} +S.ELabel(1)= {ExpLabel} +S.EresDate(1)= {resDate} +S.EresFile(1)= {resFile} +S.EresPath(1)= {resPath} + +cd .. + +fid = fopen('StudiesDataArchive.txt','a'); +fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); +fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); +fclose(fid); +fclose('all'); + +catch + cd(W) + disp('Error: Unable to Execute ExpFrontend.m') +end + +cd(W) +pwd + + + +%************************************************************************************* +%************************************************************************************* +%##################################################################################### +%Improved storage ammenable for database use +%FrontEnd utility to copy source result sheet into Exp_ folders of +%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path +%capture to the StudiesDataArchieve.txt study log. +%Select, copy and Capture Study Exp_ details to study log +studyDateNow= studyDate %preserve the 'Now' date +nowNumFNm= strcat((int2str(now)),'.mat') % from previous section incase the /StudyName doesn't have a standard date +%capture the /StudiesQHTCP directory for storing log data +cd ../.. +logPath= pwd %is /.../Exp_ +cd(W) +%Try to load an existing data set from previous Frontend calls +try %end ~ln121 + load(fullfile(logPath,'.studyLog.mat')) %load(fullfile('../../','studyLog.mat')) +catch %If no studyLog.mat found [Initial First Entry] + ExpLabel= strcat('Exp',W(end)) + resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) + cd .. %move up to current study folder from ../Exp_ folder; + %Study meta data collection + Wstudy= pwd %Capture the specific current Study directory + try %try to extract study date from folder name + studyDate= char(regexp(Wstudy, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)| \d\d\d\d\d\d)','match')) + S.sDate(1)= {studyDate}; + catch %if unsuccessful, accept the current date and convert to a 6 char string + studyDate= yyyymmdd(studyDateNow); + studyDate= (int2str(studyDate)) + studyDate= studyDate(3:8) + end + %Capture StudyName + if ispc + lastSep=max(strfind(Wstudy,'\')) + studyName= Wstudy((lastSep+1):end) + else + lastSep=max(strfind(Wstudy,'/')) + studyName= Wstudy((lastSep+1):end) + + end + %Put current First data entry into Structure of Save to StudyLog.mat file + S.sDate(1)= {studyDate}; + S.sName(1)= {studyName} + S.sPath(1)= {Wstudy} + S.ELabel(1)= {ExpLabel} + S.EresDate(1)= {resDate} + S.EresFile(1)= {resFile} + S.EresPath(1)= {resPath} + + cd(W) + +logFiletxt= fullfile(logPath,'StudiesDataArchive4DB.txt') %relative .txt path +%Initialize the StudiesDataArchive.txt file with the First row of meta data + +%Print to a .txt spreadsheet the first set of data in the logFiletxt file +fid = fopen(logFiletxt,'w'); +fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); +fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',studyDate,studyName,Wstudy,ExpLabel,resDate,resPath,resFile); +fclose(fid); +%Save the first entry meta data into a permanent .mat files for future recall +save(fullfile(logPath,'studyLog4DB.mat'), 'S') +save(fullfile(logPath,'.studyLog.mat'), 'S') +save(fullfile(logPath,'.nowNumFNm.mat'), 'S') + + + +end %end for try for the First entry only +%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +%++++++++END of FIRST ENTRY startup section++++++++++++++++++++++++++++++++ +%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +%Begin routine for all Entries After the Initial data entry**************** +ExpLabel= strcat('Exp',W(end)) +resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) %Capture date from !!Results file + +cd .. %Move to the current study folder + +%Capture study meta dat +Wstudy= pwd % Capture the path to the current study + + %Attempt to extract study date from folder name and convert to a + %searchable numeric date + studyDate= char(regexp(Wstudy, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)| \d\d\d\d\d\d)','match')) + if ~isempty(studyDate) %If no date in folder name use current date and convert to 6char string + S.sDate(1)= {studyDate}; + if isequal((str2num(studyDate(3))),'_') %Remove '_' so that date is a number for easy DB search + studyDate= strcat(studyDate(1:2),studyDate(4:6)) + end + else + studyDate= yyyymmdd(studyDateNow); + studyDate= (int2str(studyDate)) + studyDate= studyDate(3:8) + end + +if ispc + lastSep=max(strfind(Wstudy,'\')) + studyName= Wstudy((lastSep+1):end) +else + lastSep=max(strfind(Wstudy,'/')) + studyName= Wstudy((lastSep+1):end) +end + +%Put meta data into a structure for storage (studyLog.mat) and future +%compare operations +lastRow= length(S.sPath) + +%Compare these just obtained paths with those stored in the studyLog.mat +%file and add a new row if the study or results sheet meta data is new or different +cd .. %Move the the /QHTPCstudies folder +logtxt= 'StudiesDataArchive4DB.txt' +logFiletxt= fullfile(logPath,logtxt) % +try + matched=0; + for n= 1:(lastRow) + if strcmp(Wstudy,S.sPath(n)) && strcmp(resPath, S.EresPath(n)) && strcmp(ExpLabel, S.ELabel(n)) + matched= 1; + break + end + end + + +%Update the StudiesDataArchive.txt file with a new row of meta data +if matched== 0 %if no match for this entry then add a line to the text archive file + fid = fopen(logFiletxt,'w'); + S.sDate(lastRow+1)= {studyDate}; + S.sName(lastRow+1)= {studyName}; + S.sPath(lastRow+1)= {Wstudy}; + S.ELabel(lastRow+1)= {ExpLabel}; + S.EresDate(lastRow+1)= {resDate}; + S.EresFile(lastRow+1)= {resFile}; + S.EresPath(lastRow+1)= {resPath}; + for n= 1:(lastRow +1) + %Print to a .txt spreadsheet and Save update to .mat files + if n==1 + fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); + end + fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{n},S.sName{n},S.sPath{n},S.ELabel{n},S.EresDate{n},S.EresPath{n},S.EresFile{n}); + end + + fclose(fid); %close DBase amenable study log file after data update + + save((fullfile(logPath,'studyLog4DB.mat')), 'S') % path for studyLog.mat + save((fullfile(logPath,'.studyLog.mat')), 'S') %path backup studyLogBU.mat + save((fullfile(logPath,strcat('.',nowNumFNm))), 'S') +end +fclose('all'); + +catch + cd(W) %Return to the location of the frontend.m code (/studyName/Exp_) + fclose('all'); + clear S %clear data structure variable 'S.' +end + +cd(W) %Return to the location of the frontend.m code (/studyName/Exp_) +clear all %clear workspace variables + + diff --git a/workflow/.old/templates/qhtcp/Exp2/ZScores/.DS_Store b/workflow/.old/templates/qhtcp/Exp2/ZScores/.DS_Store new file mode 100644 index 00000000..9f5f58c5 Binary files /dev/null and b/workflow/.old/templates/qhtcp/Exp2/ZScores/.DS_Store differ diff --git a/workflow/.old/templates/qhtcp/Exp2/Z_InteractionTemplate.R b/workflow/.old/templates/qhtcp/Exp2/Z_InteractionTemplate.R new file mode 100644 index 00000000..fe31ca14 --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp2/Z_InteractionTemplate.R @@ -0,0 +1,2730 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt +#Tool to find a file and copy it to desired location +destDir= getwd() +#srcFile= file.choose() +#file.copy(srcFile, destDir) +#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) + + + +#Path to Output Directory +#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#std= as.numeric(Args[2]) +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") + +#User prompt for std multiplier Value +cat("Enter a Standard Deviation value to noise filter \n") +inpChar<- readLines(file("stdin"), n = 1L) +cat(paste("Standard Deviation Value is", inpChar, "\n")) +inpNum= as.numeric(inpChar) +#set std deviation multiplier default if no user entry +if(!is.na(inpNum)){ + std= inpNum +}else{std= 3} + + +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +#X_length <- length(X[1,]) +#X_end <- length(X[1,]) - 2 +#X <- X[,c(1:42,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col +#as was the case in Gemcitabin and Cytarabin experiments. +#The following allows user to rename columns so as to get the appropriate +#data where it needs to be for the script to run properly. +#colnames(X)[7] <- "Modifier1" +#colnames(X)[8] <- "Conc1" +#colnames(X)[10] <- "Drug" +#colnames(X)[11] <- "Conc" + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin +X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +#jlh confirmed to leave dAmps in so comment out this section +#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +#Damps <- read.delim(DAmPs_List,header=F) + +#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/.old/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R new file mode 100644 index 00000000..3b11777c --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp2/backups/InteractTemplateB4Prompt4SDinput.R @@ -0,0 +1,2702 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt + +#Path to Output Directory +W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") +std= as.numeric(Args[2]) +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +X_length <- length(X[1,]) +X_end <- length(X[1,]) - 2 +X <- X[,c(1:46,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 + + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +Damps <- read.delim(DAmPs_List,header=F) + +X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/Exp3/.DS_Store b/workflow/.old/templates/qhtcp/Exp3/.DS_Store new file mode 100644 index 00000000..ccdc65b6 Binary files /dev/null and b/workflow/.old/templates/qhtcp/Exp3/.DS_Store differ diff --git a/workflow/.old/templates/qhtcp/Exp3/.Rhistory b/workflow/.old/templates/qhtcp/Exp3/.Rhistory new file mode 100644 index 00000000..e69de29b diff --git a/workflow/.old/templates/qhtcp/Exp3/ExpFrontend.m b/workflow/.old/templates/qhtcp/Exp3/ExpFrontend.m new file mode 100644 index 00000000..2bdc98c3 --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp3/ExpFrontend.m @@ -0,0 +1,53 @@ +%FrontEnd utility to copy source result sheet into Exp_ folders of +%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path +%capture to the StudiesDataArchieve.txt study log. +%Select, copy and Capture Study Exp_ details to study log + +%Exp meta data collection +W=pwd; +%Load matlab stored data file into workspace +try +ExpLabel= strcat('Exp',W(end)) +questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); +[resFile,resPath]= uigetfile('*.txt') +copyfile((fullfile(resPath,resFile)),fullfile(W)) +resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) +cd .. +Wstudy= pwd +studyDate= datetime("today"); +S.sDate(1) = {studyDate}; +if ispc + lastSep=max(strfind(Wstudy,'\')) + studyName= Wstudy((lastSep+1):end) +else + lastSep=max(strfind(Wstudy,'/')) + studyName= Wstudy((lastSep+1):end) +end + +S.sDate(1)= {studyDate}; +S.sName(1)= {studyName} +S.sPath(1)= {Wstudy} +S.ELabel(1)= {ExpLabel} +S.EresDate(1)= {resDate} +S.EresFile(1)= {resFile} +S.EresPath(1)= {resPath} + +cd .. + +fid = fopen('StudiesDataArchive.txt','a'); +fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); +fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); +fclose(fid); +%save((fullfile(pwd,'studyLog.mat')), 'S') +fclose('all'); + +catch + cd(W) + disp('Error: Unable to Execute ExpFrontend.m') +end + + + + + + diff --git a/workflow/.old/templates/qhtcp/Exp3/NotesExp3 b/workflow/.old/templates/qhtcp/Exp3/NotesExp3 new file mode 100644 index 00000000..e69de29b diff --git a/workflow/.old/templates/qhtcp/Exp3/Z_InteractionTemplate.R b/workflow/.old/templates/qhtcp/Exp3/Z_InteractionTemplate.R new file mode 100644 index 00000000..fe31ca14 --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp3/Z_InteractionTemplate.R @@ -0,0 +1,2730 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt +#Tool to find a file and copy it to desired location +destDir= getwd() +#srcFile= file.choose() +#file.copy(srcFile, destDir) +#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) + + + +#Path to Output Directory +#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#std= as.numeric(Args[2]) +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") + +#User prompt for std multiplier Value +cat("Enter a Standard Deviation value to noise filter \n") +inpChar<- readLines(file("stdin"), n = 1L) +cat(paste("Standard Deviation Value is", inpChar, "\n")) +inpNum= as.numeric(inpChar) +#set std deviation multiplier default if no user entry +if(!is.na(inpNum)){ + std= inpNum +}else{std= 3} + + +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +#X_length <- length(X[1,]) +#X_end <- length(X[1,]) - 2 +#X <- X[,c(1:42,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col +#as was the case in Gemcitabin and Cytarabin experiments. +#The following allows user to rename columns so as to get the appropriate +#data where it needs to be for the script to run properly. +#colnames(X)[7] <- "Modifier1" +#colnames(X)[8] <- "Conc1" +#colnames(X)[10] <- "Drug" +#colnames(X)[11] <- "Conc" + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin +X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +#jlh confirmed to leave dAmps in so comment out this section +#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +#Damps <- read.delim(DAmPs_List,header=F) + +#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/.old/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R new file mode 100644 index 00000000..3b11777c --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp3/backups/InteractTemplateB4Prompt4SDinput.R @@ -0,0 +1,2702 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt + +#Path to Output Directory +W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") +std= as.numeric(Args[2]) +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +X_length <- length(X[1,]) +X_end <- length(X[1,]) - 2 +X <- X[,c(1:46,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 + + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +Damps <- read.delim(DAmPs_List,header=F) + +X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/Exp4/.DS_Store b/workflow/.old/templates/qhtcp/Exp4/.DS_Store new file mode 100644 index 00000000..407f212c Binary files /dev/null and b/workflow/.old/templates/qhtcp/Exp4/.DS_Store differ diff --git a/workflow/.old/templates/qhtcp/Exp4/ExpFrontend.m b/workflow/.old/templates/qhtcp/Exp4/ExpFrontend.m new file mode 100644 index 00000000..2bdc98c3 --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp4/ExpFrontend.m @@ -0,0 +1,53 @@ +%FrontEnd utility to copy source result sheet into Exp_ folders of +%StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path +%capture to the StudiesDataArchieve.txt study log. +%Select, copy and Capture Study Exp_ details to study log + +%Exp meta data collection +W=pwd; +%Load matlab stored data file into workspace +try +ExpLabel= strcat('Exp',W(end)) +questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex')); +[resFile,resPath]= uigetfile('*.txt') +copyfile((fullfile(resPath,resFile)),fullfile(W)) +resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) +cd .. +Wstudy= pwd +studyDate= datetime("today"); +S.sDate(1) = {studyDate}; +if ispc + lastSep=max(strfind(Wstudy,'\')) + studyName= Wstudy((lastSep+1):end) +else + lastSep=max(strfind(Wstudy,'/')) + studyName= Wstudy((lastSep+1):end) +end + +S.sDate(1)= {studyDate}; +S.sName(1)= {studyName} +S.sPath(1)= {Wstudy} +S.ELabel(1)= {ExpLabel} +S.EresDate(1)= {resDate} +S.EresFile(1)= {resFile} +S.EresPath(1)= {resPath} + +cd .. + +fid = fopen('StudiesDataArchive.txt','a'); +fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n'); +fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1}); +fclose(fid); +%save((fullfile(pwd,'studyLog.mat')), 'S') +fclose('all'); + +catch + cd(W) + disp('Error: Unable to Execute ExpFrontend.m') +end + + + + + + diff --git a/workflow/.old/templates/qhtcp/Exp4/NotesExp4 b/workflow/.old/templates/qhtcp/Exp4/NotesExp4 new file mode 100644 index 00000000..e69de29b diff --git a/workflow/.old/templates/qhtcp/Exp4/Z_InteractionTemplate.R b/workflow/.old/templates/qhtcp/Exp4/Z_InteractionTemplate.R new file mode 100644 index 00000000..fe31ca14 --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp4/Z_InteractionTemplate.R @@ -0,0 +1,2730 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt +#Tool to find a file and copy it to desired location +destDir= getwd() +#srcFile= file.choose() +#file.copy(srcFile, destDir) +#input_file= tail(strsplit(srcFile,"[/]")[[1]],1) + + + +#Path to Output Directory +#W=getwd() #R is F'd up, Can't use, Any legitamate platform could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#std= as.numeric(Args[2]) +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean") + +#User prompt for std multiplier Value +cat("Enter a Standard Deviation value to noise filter \n") +inpChar<- readLines(file("stdin"), n = 1L) +cat(paste("Standard Deviation Value is", inpChar, "\n")) +inpNum= as.numeric(inpChar) +#set std deviation multiplier default if no user entry +if(!is.na(inpNum)){ + std= inpNum +}else{std= 3} + + +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +#X_length <- length(X[1,]) +#X_end <- length(X[1,]) - 2 +#X <- X[,c(1:42,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#Sometimes the an experimenter may have placed the non-varying drug in the 'Drug' col instead of the 'Modifier1' col +#as was the case in Gemcitabin and Cytarabin experiments. +#The following allows user to rename columns so as to get the appropriate +#data where it needs to be for the script to run properly. +#colnames(X)[7] <- "Modifier1" +#colnames(X)[8] <- "Conc1" +#colnames(X)[10] <- "Drug" +#colnames(X)[11] <- "Conc" + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +#X <- X[X$Conc1 != "0ug/ml",] #This removes data with dox ==0 leaving gene expression on with four different concentrations of Gemcytabin +X <- X[X$Drug != "BMH21",] #This removes data concerning BMH21 for this experiment + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +#jlh confirmed to leave dAmps in so comment out this section +#DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +#Damps <- read.delim(DAmPs_List,header=F) + +#X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +#XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R b/workflow/.old/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R new file mode 100644 index 00000000..3b11777c --- /dev/null +++ b/workflow/.old/templates/qhtcp/Exp4/backups/InteractTemplateB4Prompt4SDinput.R @@ -0,0 +1,2702 @@ +#Based on InteractionTemplate.R which is based on Sean Santose's Interaction_V5 script. +#Adapt SS For Structured Data storage but using command line scripts +###Set up the required libraries, call required plot theme elements and set up the command line arguments +library("ggplot2") +library("plyr") +library("extrafont") +library("gridExtra") +library("gplots") +library("RColorBrewer") +library("stringr") +#library("gdata") +library(plotly) +library(htmlwidgets) + +Args <- commandArgs(TRUE) +input_file <- Args[1] #"!!Results_17_0827_yor1null-rpl12anull misLabeledAsFrom MI 17_0919_yor1-curated.txt" #Args[1] #Arg 1 #"!!ResultsStd_JS 19_1224_HLEG_P53.txt" is the !!results ... .txt + +#Path to Output Directory +W=getwd() #R is F'd up, Can't use, Any legitamate platfold could build out dirs from this +outDir <- "ZScores/" #"Args[2] #paste0(W,"/ZScores/") +subDir <- outDir #Args[2] + +if (file.exists(subDir)){ + outputpath <- subDir +} else { + dir.create(file.path(subDir)) +} + +if (file.exists(paste(subDir,"QC/",sep=""))){ + outputpath_QC <- paste(subDir,"QC/",sep="") +} else { + dir.create(file.path(paste(subDir,"QC/",sep=""))) + outputpath_QC <- paste(subDir,"QC/",sep="") +} +#define the output path (formerly the second argument from Rscript) +outputpath <- outDir + +#Set Args[2] the Background contamination noise filter as a function of standard deviation +#Sean recommends 3 or 5 SD factor. +#Capture Exp_ number,use it to Save Args[2]{std}to Labels field and then Write to Labels to studyInfo.txt for future reference +Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE) #,sep= ",") +print("Be sure to include Argument 2 the Bacground noise filter standard deviation i.e., 3 or 5 per Sean") +std= as.numeric(Args[2]) +expNumber<- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd())) +Labels[expNumber,3]= as.numeric(std) +Delta_Background_sdFactor <- std +DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#Write Background SD value to studyInfo.txt file +#write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) +print('ln 50 write StudyInfo.csv ') +#write.table(Labels,file=paste(outputpath,"StudyInfo.txt"),sep = "\t",row.names = FALSE) + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++BEGIN USER DATA SELECTION SECTION+++++++++++++++++++++++++++++++++++++++++++++++++ + + +#read in the data +X <- read.delim(input_file,skip=2,as.is=T,row.names=1,strip.white=TRUE) +X <- X[!(X[[1]]%in%c("","Scan")),] +#X <- X[!(X[[1]]%in%c(61:76)),] #Remove dAmp plates which are Scans 61 thru 76 + +#X <- X[which(X$Specifics == "WT"),] + +X_length <- length(X[1,]) +X_end <- length(X[1,]) - 2 +X <- X[,c(1:46,X_end:X_length)] + + +#use numeric data to perform operations +X$Col <- as.numeric(X$Col) +X$Row <- as.numeric(X$Row) +X$l <- as.numeric(X$l) +X$K <- as.numeric(X$K) +X$r <- as.numeric(X$r) +X$Scan <- as.numeric(X$Scan) +X$AUC <- as.numeric(X$AUC) +X$LstBackgrd <- as.numeric(X$LstBackgrd) +X$X1stBackgrd <- as.numeric(X$X1stBackgrd) + +#set the OrfRep to YDL227C for the ref data +X[X$ORF == "YDL227C",]$OrfRep <- "YDL227C" +#Sean removes the Doxycyclin at 0.0ug.mL so that only the Oligomycin series with Doxycyclin of 0.12ug/mL are used. +#That is the first DM plates are removed from the data set with the following. +X <- X[X$Conc1 != "0ug/mL",] #This occurs only for Exp1 and Exp2 and so doesn't have any effect on Exp3&4 + + +#Mert placed the"bad_spot" text in the ORF col. for particular spots in the RF1 and RF2 plates. +#This code removes those spots from the data set used for the interaction analysis. Dr.Hartman feels that these donot effect Zscores significantly and so "non-currated" files were used. +#try(X <- X[X$ORF != "bad_spot",]) +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= + +#get total number of drug concentrations +Total_Conc_Nums <- length(unique(X$Conc)) + +#function to ID numbers in string with characters+numbers (ie to get numeric drug conc) +numextract <- function(string){ + str_extract(string, "\\-*\\d+\\.*\\d*") +} + +#generate a new column with the numeric drug concs +X$Conc_Num <- as.numeric(numextract(X$Conc)) +#Generate new column with the numeric drug concs as factors starting at 0 for the graphing later +X$Conc_Num_Factor <- as.numeric(as.factor(X$Conc_Num)) - 1 + +#Get the max factor for concentration +MAX_CONC <- max(X$Conc_Num_Factor) +#if treating numbers not as factors uncomment next line and comment out previous line +#MAX_CONC <- max(X$Conc_Num) + +#remove wells with problems for making graphs and to not include in summary statistics +X <- X[X$Gene != "BLANK",] +X <- X[X$Gene != "Blank",] +X <- X[X$ORF != "Blank",] +X <- X[X$Gene != "blank",] +#X <- X[X$Gene != "HO",] +Xbu= X +#Inserted to use SGDgenelist to update orfs and replace empty geneName cells with ORF name (adapted from Sean's Merge script). This is to 'fix' the naming for everything that follows (REMc, Heatmaps ... et.al) rather than do it piece meal later +#Sean's Match Script( which was adapted here) was fixed 2022_0608 so as not to write over the RF1&RF2 geneNames which caused a variance with his code results +#in the Z_lm_L,K,r&AUC output values. Values correlated well but were off by a multiplier factor. +SGDgeneList= "../Code/SGD_features.tab" +genes = data.frame(read.delim(file=SGDgeneList,quote="",header=FALSE,colClasses = c(rep("NULL",3), rep("character", 2), rep("NULL", 11)))) +for(i in 1:length(X[,14])){ + ii= as.numeric(i) + line_num = match(X[ii,14],genes[,1],nomatch=1) + OrfRepColNum= as.numeric(match('OrfRep',names(X))) + if(X[ii,OrfRepColNum]!= "YDL227C"){ + X[ii,15] = genes[line_num,2] + } + if((X[ii,15] == "")||(X[ii,15] == "OCT1")){ + X[ii,15] = X[ii,OrfRepColNum] + } +} +Xblankreplace= X +#X= Xbu #for restore testing restore X if geneName 'Match' routine needs changing + +#Remove dAmPs ******************************* +DAmPs_List <- "../Code/22_0602_Remy_DAmPsList.txt" +Damps <- read.delim(DAmPs_List,header=F) + +X <- X[!(X$ORF %in% Damps$V1),] #fix this to Damps[,1] +XafterDampsRM=X #Backup for debugging especially when Rstudio goes crazy out of control +# *********** + + +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++END USER DATA SELECTION+++++++++++++++++++++++++++++++++++++++++++++++++ +#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +print("ln137 End of User Section including blank gene writeOver") +#++++Begin Graphics Boiler Plate Section+++++++++++++++++++++++++++++++++++++++ +#theme elements for plots +theme_Publication <- function(base_size=14, base_family="sans") { + library(grid) + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size= unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + library(scales) + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +theme_Publication_legend_right <- function(base_size=14, base_family="sans") { + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour="black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size= unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face="italic"), + plot.margin=unit(c(10,5,5,5),"mm"), + strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), + strip.text = element_text(face="bold") + )) + +} + +scale_fill_Publication <- function(...){ + discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + +scale_colour_Publication <- function(...){ + discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + +} + + +#print timestamp for initial time the code starts +timestamp() +#+++++BEGIN QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ +###Part 2 - Quality control +#print quality control graphs for each dataset before removing data due to contamination +#and before adjusting missing data to max theoretical values + +#plate analysis plot +#plate analysis is a quality check to identify plate effects containing anomalies + +Plate_Analysis_L <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + + + +Plate_Analysis_L_Box <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_Publication() + +Plate_Analysis_K_Box <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K before quality control") + theme_Publication() + +Plate_Analysis_r_Box <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r before quality control") + theme_Publication() + +Plate_Analysis_AUC_Box <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC before quality control") + theme_Publication() + +#quality control - values with a high delta background likely have heavy contamination +#check the frequency of these values +#report the L and K values of these spots +#report the number to be removed based on the Delta_Background_Tolerance +X$Delta_Backgrd <- X$LstBackgrd - X$X1stBackgrd + + +#raw l vs K before QC +Raw_l_vs_K_beforeQC <- ggplot(X,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K before QC") + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_beforeQC.pdf",sep=""),width = 12,height = 8) +Raw_l_vs_K_beforeQC +dev.off() +pgg <- ggplotly(Raw_l_vs_K_beforeQC) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_beforeQC.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + +#set delta background tolerance based on 3 sds from the mean delta background +Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(DelBGFactr*sd(X$Delta_Backgrd)) +#Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd)) +print(paste("Delta_Background_Tolerance is",Delta_Background_Tolerance,sep=" ")) + +Plate_Analysis_Delta_Backgrd <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2,position="jitter") + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd before quality control") + theme_Publication() + + + +X_Delta_Backgrd_above_Tolerance <- X[X$Delta_Backgrd >= Delta_Background_Tolerance,] + +X_Delta_Backgrd_above_Tolerance_K_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$K,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_L_halfmedian <- (median(X_Delta_Backgrd_above_Tolerance$l,na.rm = TRUE))/2 +X_Delta_Backgrd_above_Tolerance_toRemove <- dim(X_Delta_Backgrd_above_Tolerance)[1] + +X_Delta_Backgrd_above_Tolerance_L_vs_K <- ggplot(X_Delta_Backgrd_above_Tolerance,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle(paste("Raw L vs K for strains above delta background threshold of",Delta_Background_Tolerance,"or above")) + + annotate("text",x=X_Delta_Backgrd_above_Tolerance_L_halfmedian,y=X_Delta_Backgrd_above_Tolerance_K_halfmedian, + label = paste("Strains above delta background tolerance = ",X_Delta_Backgrd_above_Tolerance_toRemove)) + + theme_Publication_legend_right() +pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf",sep=""),width = 12,height = 8) +X_Delta_Backgrd_above_Tolerance_L_vs_K +dev.off() +pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K) +#pgg +plotly_path <- paste(getwd(),"/",outputpath_QC,"Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html",sep="") +saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + +#frequency plot for all data vs. the delta_background +DeltaBackground_Frequency_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_density() + + ggtitle("Density plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +#bar plot for all data vs. the delta_background +DeltaBackground_Bar_Plot <- ggplot(X,aes(Delta_Backgrd,color=as.factor(Conc_Num))) + geom_bar() + + ggtitle("Bar plot for Delta Background by Conc All Data") + theme_Publication_legend_right() + +pdf(file = paste(outputpath_QC,"Frequency_Delta_Background.pdf",sep=""),width = 12,height = 8) +print(DeltaBackground_Frequency_Plot) +print(DeltaBackground_Bar_Plot) +dev.off() + + +#Need to identify missing data, and differentiate between this data and removed data so the removed data can get set to NA and the missing data can get set to max theoretical values +#1 for missing data, 0 for non missing data +#Use "NG" for NoGrowth rather than "missing" +X$NG <- 0 +try(X[X$l == 0 & !is.na(X$l),]$NG <- 1) + +#1 for removed data, 0 non removed data +#Use DB to identify number of genes removed due to the DeltaBackground Threshold rather than "Removed" +X$DB <- 0 +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$DB <- 1) + +#replace the CPPs for l, r, AUC and K (must be last!) for removed data +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$l <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$r <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$AUC <- NA) +try(X[X$Delta_Backgrd >= Delta_Background_Tolerance,]$K <- NA) + + +Plate_Analysis_L_afterQC <- ggplot(X,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC <- ggplot(X,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC <- ggplot(X,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC <- ggplot(X,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC <- ggplot(X,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC <- ggplot(X,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC <- ggplot(X,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC <- ggplot(X,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC <- ggplot(X,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC <- ggplot(X,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L +Plate_Analysis_L_afterQC +Plate_Analysis_K +Plate_Analysis_K_afterQC +Plate_Analysis_r +Plate_Analysis_r_afterQC +Plate_Analysis_AUC +Plate_Analysis_AUC_afterQC +Plate_Analysis_Delta_Backgrd +Plate_Analysis_Delta_Backgrd_afterQC +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box +Plate_Analysis_L_Box_afterQC +Plate_Analysis_K_Box +Plate_Analysis_K_Box_afterQC +Plate_Analysis_r_Box +Plate_Analysis_r_Box_afterQC +Plate_Analysis_AUC_Box +Plate_Analysis_AUC_Box_afterQC +Plate_Analysis_Delta_Backgrd_Box +Plate_Analysis_Delta_Backgrd_Box_afterQC +dev.off() + +#remove the zero values and print plate analysis +X_noZero <- X[which(X$l > 0),] +Plate_Analysis_L_afterQC_Z <- ggplot(X_noZero,aes(Scan,l,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_afterQC_Z <- ggplot(X_noZero,aes(Scan,K,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_afterQC_Z <- ggplot(X_noZero,aes(Scan,r,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_afterQC_Z <- ggplot(X_noZero,aes(Scan,AUC,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_afterQC_Z <- ggplot(X_noZero,aes(Scan,Delta_Backgrd,color=as.factor(Conc_Num))) + geom_point(shape=3,size=0.2) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar") + stat_summary(fun.y = mean, geom = "point",size=0.6) + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + + +Plate_Analysis_L_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),l,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for L after quality control") + theme_Publication() + +Plate_Analysis_K_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),K,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for K after quality control") + theme_Publication() + +Plate_Analysis_r_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),r,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for r after quality control") + theme_Publication() + +Plate_Analysis_AUC_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),AUC,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for AUC after quality control") + theme_Publication() + +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <- ggplot(X_noZero,aes(as.factor(Scan),Delta_Backgrd,color=as.factor(Conc_Num))) + geom_boxplot() + + ggtitle("Plate analysis by Drug Conc for Delta_Backgrd after quality control") + theme_Publication() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros.pdf",sep=""),width = 14,height=9) +Plate_Analysis_L_afterQC_Z +Plate_Analysis_K_afterQC_Z +Plate_Analysis_r_afterQC_Z +Plate_Analysis_AUC_afterQC_Z +Plate_Analysis_Delta_Backgrd_afterQC_Z +dev.off() + +#print the plate analysis data before and after QC +pdf(file=paste(outputpath_QC,"Plate_Analysis_noZeros_Boxplots.pdf",sep=""),width = 18,height=9) +Plate_Analysis_L_Box_afterQC_Z +Plate_Analysis_K_Box_afterQC_Z +Plate_Analysis_r_Box_afterQC_Z +Plate_Analysis_AUC_Box_afterQC_Z +Plate_Analysis_Delta_Backgrd_Box_afterQC_Z +dev.off() + +#remove dataset with zeros removed +rm(X_noZero) + + +#X_test_missing_and_removed <- X[X$Removed == 1,] + +#calculate summary statistics for all strains, including both background and the deletions +X_stats_ALL <- ddply(X, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) +) +#print(X_stats_ALL_L) + +write.csv(X_stats_ALL,file=paste(outputpath,"SummaryStats_ALLSTRAINS.csv"),row.names = FALSE) +#+++++END QC SECTION+++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +##### Part 3 - Generate summary statistics and calculate the max theoretical L value +##### Calculate the Z score at each drug conc for each deletion strain + + +#get the background strains - can be modified to take another argument but for most screens will just be YDL227C +Background_Strains <- c("YDL227C") + +#first part of loop will go through for each background strain +#most cases there will only be one YDL227C +for(s in Background_Strains){ + X_Background <- X[X$OrfRep == s,] + + #if there's missing data for the background strains set these values to NA so the 0 values aren't included in summary statistics + #we may want to consider in some cases giving the max high value to L depending on the data type + if(table(X_Background$l)[1] == 0){ + X_Background[X_Background$l == 0,]$l <- NA + X_Background[X_Background$K == 0,]$K <- NA + X_Background[X_Background$r == 0,]$r <- NA + X_Background[X_Background$AUC == 0,]$AUC <- NA + } + + X_Background <- X_Background[!is.na(X_Background$l),] + + #get summary stats for L, K, R, AUC + X_stats_BY_L <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l,na.rm=TRUE), + median = median(l,na.rm=TRUE), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L) + X1_SD <- max(X_stats_BY_L$sd) + + X_stats_BY_K <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(K)), + mean = mean(K,na.rm=TRUE), + median = median(K,na.rm=TRUE), + max = max(K,na.rm=TRUE), + min = min(K,na.rm=TRUE), + sd = sd(K,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_K <- max(X_stats_BY_K$sd) + + + X_stats_BY_r <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(r), + mean = mean(r,na.rm=TRUE), + median = median(r,na.rm=TRUE), + max = max(r,na.rm=TRUE), + min = min(r,na.rm=TRUE), + sd = sd(r,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_r <- max(X_stats_BY_r$sd) + + X_stats_BY_AUC <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = length(AUC), + mean = mean(AUC,na.rm=TRUE), + median = median(AUC,na.rm=TRUE), + max = max(AUC,na.rm=TRUE), + min = min(AUC,na.rm=TRUE), + sd = sd(AUC,na.rm=TRUE), + se = sd / sqrt(N-1) + ) + + X1_SD_AUC <- max(X_stats_BY_AUC$sd) + + X_stats_BY <- ddply(X_Background, c("OrfRep","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_L = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + se_AUC = sd_AUC / sqrt(N-1) + ) + + write.csv(X_stats_BY,file=paste(outputpath,"SummaryStats_BackgroundStrains.csv"),row.names=FALSE) + + #calculate the max theoretical L values + #only look for max values when K is within 2SD of the ref strain + for(q in unique(X$Conc_Num_Factor)){ + if(q == 0){ + X_within_2SD_K <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[!is.na(X_within_2SD_K$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) ,] + X_within_2SD_K <- X_within_2SD_K[X_within_2SD_K$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + X_outside_2SD_K <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[!is.na(X_outside_2SD_K$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K <- X_outside_2SD_K[X_outside_2SD_K$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + + } + if(q > 0){ + X_within_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[!is.na(X_within_2SD_K_temp$l),] + X_stats_TEMP_K <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K_temp <- X_within_2SD_K_temp[X_within_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])),] + X_within_2SD_K <- rbind(X_within_2SD_K,X_within_2SD_K_temp) + + X_outside_2SD_K_temp <- X[X$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[!is.na(X_outside_2SD_K_temp$l),] + #X_outside_2SD_K_Temp <- X_stats_BY_K[X_stats_BY_K$Conc_Num_Factor == q,] + X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K <= (X_stats_TEMP_K$mean[1] - (2*X_stats_TEMP_K$sd[1])) | X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + #X_outside_2SD_K_temp <- X_outside_2SD_K_temp[X_outside_2SD_K_temp$K >= (X_stats_TEMP_K$mean[1] + (2*X_stats_TEMP_K$sd[1])) ,] + X_outside_2SD_K <- rbind(X_outside_2SD_K,X_outside_2SD_K_temp) + } + } + + X_stats_BY_L_within_2SD_K <- ddply(X_within_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1), + z_max = (max-mean)/sd + ) + print(X_stats_BY_L_within_2SD_K) + X1_SD_within_2SD_K <- max(X_stats_BY_L_within_2SD_K$sd) + write.csv(X_stats_BY_L_within_2SD_K,file=paste(outputpath_QC,"Max_Observed_L_Vals_for_spots_within_2SD_K.csv",sep=""),row.names=FALSE) + + X_stats_BY_L_outside_2SD_K <- ddply(X_outside_2SD_K, c("Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean = mean(l), + median = median(l), + max = max(l,na.rm=TRUE), + min = min(l,na.rm=TRUE), + sd = sd(l), + se = sd / sqrt(N-1) + ) + print(X_stats_BY_L_outside_2SD_K) + X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) + + #X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l,] + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K,aes(l,K,color=as.factor(Conc_Num))) + geom_point(aes(ORF=ORF,Gene=Gene,Delta_Backgrd=Delta_Backgrd),shape=3) + + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + print(Outside_2SD_K_L_vs_K) + dev.off() + pgg <- ggplotly(Outside_2SD_K_L_vs_K) + plotly_path <- paste(getwd(),"/",outputpath_QC,"RawL_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + Outside_2SD_K_delta_background_vs_K <- ggplot(X_outside_2SD_K,aes(Delta_Backgrd,K,color=as.factor(Conc_Num))) + geom_point(aes(l=l,ORF=ORF,Gene=Gene),shape=3,position="jitter") + + ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") + theme_Publication_legend_right() + pdf(paste(outputpath_QC,"DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf",sep=""),width = 10,height = 8) + Outside_2SD_K_delta_background_vs_K + dev.off() + pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K) + #pgg + plotly_path <- paste(getwd(),"/",outputpath_QC,"DeltaBackground_vs_K_for_strains_outside_2SD_K.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + + #get the background strain mean values at the no drug conc to calculate shift + Background_L <- X_stats_BY_L$mean[1] + Background_K <- X_stats_BY_K$mean[1] + Background_r <- X_stats_BY_r$mean[1] + Background_AUC <- X_stats_BY_AUC$mean[1] + + #create empty plots for plotting element + p_l <- ggplot() + p_K <- ggplot() + p_r <- ggplot() + p_AUC <- ggplot() + + p_rf_l <- ggplot() + p_rf_K <- ggplot() + p_rf_r <- ggplot() + p_rf_AUC <- ggplot() + + #get only the deletion strains + X2 <- X + X2 <- X2[X2$OrfRep != "YDL227C",] + + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2$Conc_Num))){ + Concentration <- unique(X2$Conc_Num)[i] + X2_temp <- X2[X2$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_new <- X2_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + + #X2_temp[X2_temp$K == 0,]$K <- X_stats_ALL_K$max[i] + #X2_temp[X2_temp$r == 0,]$r <- X_stats_ALL_r$max[i] + #X2_temp[X2_temp$AUC == 0,]$AUC <- X_stats_ALL_AUC$max[i] + print(paste("Check loop order, conc =",Concentration,sep=" ")) + + X2_new <- rbind(X2_new,X2_temp) + + } + } + X2 <- X2_new + + + #get only the RF strains + X2_RF <- X + X2_RF <- X2_RF[X2_RF$OrfRep == "YDL227C",] + #if set to max theoretical value, add a 1 to SM, if not, leave as 0 + #SM = Set to Max + X2_RF$SM <- 0 + #set the missing values to the highest theoretical value at each drug conc for L, leave other values as 0 for the max/min + for(i in 1:length(unique(X2_RF$Conc_Num))){ + Concentration <- unique(X2_RF$Conc_Num)[i] + X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration,] + if(Concentration == 0){ + X2_RF_new <- X2_RF_temp + print(paste("Check loop order, conc =",Concentration,sep=" ")) + } + if(Concentration > 0){ + try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l),]$SM <- 1) + try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l),]$l <- X_stats_BY_L_within_2SD_K$max[i]) + print(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =",Concentration,sep=" ")) + + X2_RF_new <- rbind(X2_RF_new,X2_RF_temp) + + } + } + X2_RF <- X2_RF_new + + + #######Part 4 Get the RF Z score values + #Change the OrfRep Column to include the RF strain, the Gene name and the Num. so each RF gets its own score + X2_RF$OrfRep <- paste(X2_RF$OrfRep,X2_RF$Gene,X2_RF$Num.,sep="_") + + num_genes_RF <- length(unique(X2_RF$OrfRep)) + print(num_genes_RF) + + + #create the output data.frame containing columns for each RF strain + InteractionScores_RF <- unique(X2_RF["OrfRep"]) + #InteractionScores_RF$Gene <- unique(X2$Gene) + InteractionScores_RF$Gene <- NA + InteractionScores_RF$Raw_Shift_L <- NA + InteractionScores_RF$Z_Shift_L <- NA + InteractionScores_RF$lm_Score_L <- NA + InteractionScores_RF$Z_lm_L <- NA + InteractionScores_RF$R_Squared_L <- NA + InteractionScores_RF$Sum_Z_Score_L <- NA + InteractionScores_RF$Avg_Zscore_L <- NA + InteractionScores_RF$Raw_Shift_K <- NA + InteractionScores_RF$Z_Shift_K <- NA + InteractionScores_RF$lm_Score_K <- NA + InteractionScores_RF$Z_lm_K <- NA + InteractionScores_RF$R_Squared_K <- NA + InteractionScores_RF$Sum_Z_Score_K <- NA + InteractionScores_RF$Avg_Zscore_K <- NA + InteractionScores_RF$Raw_Shift_r <- NA + InteractionScores_RF$Z_Shift_r <- NA + InteractionScores_RF$lm_Score_r <- NA + InteractionScores_RF$Z_lm_r <- NA + InteractionScores_RF$R_Squared_r <- NA + InteractionScores_RF$Sum_Z_Score_r <- NA + InteractionScores_RF$Avg_Zscore_r <- NA + InteractionScores_RF$Raw_Shift_AUC <- NA + InteractionScores_RF$Z_Shift_AUC <- NA + InteractionScores_RF$lm_Score_AUC <- NA + InteractionScores_RF$Z_lm_AUC <- NA + InteractionScores_RF$R_Squared_AUC <- NA + InteractionScores_RF$Sum_Z_Score_AUC <- NA + InteractionScores_RF$Avg_Zscore_AUC <- NA + InteractionScores_RF$NG <- NA + InteractionScores_RF$SM <- NA + + + for(i in 1:num_genes_RF){ + #get each deletion strain ORF + Gene_Sel <- unique(X2_RF$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2_RF[X2_RF$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + gene_lm_AUC <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + gene_interaction_AUC <- NA + r_squared_AUC <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores_RF$OrfRep[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$OrfRep[1] + InteractionScores_RF$Gene[InteractionScores_RF$OrfRep == Gene_Sel] <- X_Gene_Sel$Gene[1] + + InteractionScores_RF$Raw_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores_RF$Z_Shift_L[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores_RF$lm_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores_RF$R_Squared_L[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores_RF$Sum_Z_Score_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_L[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores_RF$Z_Shift_K[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores_RF$lm_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores_RF$R_Squared_K[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores_RF$Sum_Z_Score_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_K[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores_RF$Z_Shift_r[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores_RF$lm_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores_RF$R_Squared_r[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores_RF$Sum_Z_Score_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_r[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + + InteractionScores_RF$Raw_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores_RF$Z_Shift_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores_RF$lm_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores_RF$R_Squared_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores_RF$Sum_Z_Score_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + InteractionScores_RF$Avg_Zscore_AUC[InteractionScores_RF$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL_RF <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL_RF <- rbind(X_stats_interaction_ALL_RF,X_stats_interaction) + } + + InteractionScores_RF$NG[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores_RF$DB[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores_RF$SM[InteractionScores_RF$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass RF Calculation loop") + + lm_sd_L <- sd(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_sd_K <- sd(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_sd_r <- sd(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_sd_AUC <- sd(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + lm_mean_L <- mean(InteractionScores_RF$lm_Score_L,na.rm=TRUE) + lm_mean_K <- mean(InteractionScores_RF$lm_Score_K,na.rm=TRUE) + lm_mean_r <- mean(InteractionScores_RF$lm_Score_r,na.rm=TRUE) + lm_mean_AUC <- mean(InteractionScores_RF$lm_Score_AUC,na.rm=TRUE) + + print(paste("Mean RF linear regression score L",lm_mean_L)) + + + InteractionScores_RF$Z_lm_L <- (InteractionScores_RF$lm_Score_L - lm_mean_L)/(lm_sd_L) + InteractionScores_RF$Z_lm_K <- (InteractionScores_RF$lm_Score_K - lm_mean_K)/(lm_sd_K) + InteractionScores_RF$Z_lm_r <- (InteractionScores_RF$lm_Score_r - lm_mean_r)/(lm_sd_r) + InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC)/(lm_sd_AUC) + + + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L,decreasing=TRUE),] + InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG,decreasing=TRUE),] + write.csv(InteractionScores_RF,paste(outputpath,"RF_ZScores_Interaction.csv",sep=""),row.names=FALSE) + + + for(i in 1:num_genes_RF){ + Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL_RF[X_stats_interaction_ALL_RF$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores_RF[InteractionScores_RF$OrfRep == Gene_Sel,] + + p_rf_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg Zscore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("lm Zscore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_rf_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_rf_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("lm ZScore =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_RF_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_RF_final <- rbind(X_stats_interaction_ALL_RF_final,X_ZCalculations) + } + } + print("Pass RF ggplot loop") + write.csv(X_stats_interaction_ALL_RF_final,paste(outputpath,"RF_ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + ####### Part 5 - Get Zscores for Gene deletion strains + + + + #get total number of genes for the next loop + num_genes <- length(unique(X2$OrfRep)) + print(num_genes) + + #create the output data.frame containing columns for each deletion strain + InteractionScores <- unique(X2["OrfRep"]) + #InteractionScores$Gene <- unique(X2$Gene) + InteractionScores$Gene <- NA + InteractionScores$Raw_Shift_L <- NA + InteractionScores$Z_Shift_L <- NA + InteractionScores$lm_Score_L <- NA + InteractionScores$Z_lm_L <- NA + InteractionScores$R_Squared_L <- NA + InteractionScores$Sum_Z_Score_L <- NA + InteractionScores$Avg_Zscore_L <- NA + InteractionScores$Raw_Shift_K <- NA + InteractionScores$Z_Shift_K <- NA + InteractionScores$lm_Score_K <- NA + InteractionScores$Z_lm_K <- NA + InteractionScores$R_Squared_K <- NA + InteractionScores$Sum_Z_Score_K <- NA + InteractionScores$Avg_Zscore_K <- NA + InteractionScores$Raw_Shift_r <- NA + InteractionScores$Z_Shift_r <- NA + InteractionScores$lm_Score_r <- NA + InteractionScores$Z_lm_r <- NA + InteractionScores$R_Squared_r <- NA + InteractionScores$Sum_Z_Score_r <- NA + InteractionScores$Avg_Zscore_r <- NA + InteractionScores$Raw_Shift_AUC <- NA + InteractionScores$Z_Shift_AUC <- NA + InteractionScores$lm_Score_AUC <- NA + InteractionScores$Z_lm_AUC <- NA + InteractionScores$R_Squared_AUC <- NA + InteractionScores$Sum_Z_Score_AUC <- NA + InteractionScores$Avg_Zscore_AUC <- NA + InteractionScores$NG <- NA + InteractionScores$DB <- NA + InteractionScores$SM <- NA + + for(i in 1:num_genes){ + #get each deletion strain ORF + Gene_Sel <- unique(X2$OrfRep)[i] + #extract only the current deletion strain and its data + X_Gene_Sel <- X2[X2$OrfRep == Gene_Sel,] + + X_stats_interaction <- ddply(X_Gene_Sel, c("OrfRep","Gene","Conc_Num","Conc_Num_Factor"), summarise, + N = (length(l)), + mean_L = mean(l,na.rm = TRUE), + median_L = median(l,na.rm = TRUE), + sd_L = sd(l,na.rm = TRUE), + se_L = sd_L / sqrt(N-1), + mean_K = mean(K,na.rm = TRUE), + median_K = median(K,na.rm = TRUE), + sd_K = sd(K,na.rm = TRUE), + se_K = sd_K / sqrt(N-1), + mean_r = mean(r,na.rm = TRUE), + median_r = median(r,na.rm = TRUE), + sd_r = sd(r,na.rm = TRUE), + se_r = sd_r / sqrt(N-1), + mean_AUC = mean(AUC,na.rm = TRUE), + median_AUC = median(AUC,na.rm = TRUE), + sd_AUC = sd(AUC,na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N-1), + NG = sum(NG,na.rm=TRUE), + DB= sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + #Get shift vals + #if L is 0, that means the no growth on no drug + #if L is NA at 0, that means the spot was removed due to contamination + #if L is 0, keep the shift at 0 and for other drug concs calculate delta Ls with no shift + #otherwise calculate shift at no drug conc + if(is.na(X_stats_interaction$mean_L[1]) | X_stats_interaction$mean_L[1] == 0 ){ + X_stats_interaction$Raw_Shift_L <- 0 + X_stats_interaction$Raw_Shift_K <- 0 + X_stats_interaction$Raw_Shift_r <- 0 + X_stats_interaction$Raw_Shift_AUC <- 0 + X_stats_interaction$Z_Shift_L <- 0 + X_stats_interaction$Z_Shift_K <- 0 + X_stats_interaction$Z_Shift_r <- 0 + X_stats_interaction$Z_Shift_AUC <- 0 + }else{ + X_stats_interaction$Raw_Shift_L <- X_stats_interaction$mean_L[1] - Background_L + X_stats_interaction$Raw_Shift_K <- X_stats_interaction$mean_K[1] - Background_K + X_stats_interaction$Raw_Shift_r <- X_stats_interaction$mean_r[1] - Background_r + X_stats_interaction$Raw_Shift_AUC <- X_stats_interaction$mean_AUC[1] - Background_AUC + X_stats_interaction$Z_Shift_L <- X_stats_interaction$Raw_Shift_L[1]/X_stats_BY_L$sd[1] + X_stats_interaction$Z_Shift_K <- X_stats_interaction$Raw_Shift_K[1]/X_stats_BY_K$sd[1] + X_stats_interaction$Z_Shift_r <- X_stats_interaction$Raw_Shift_r[1]/X_stats_BY_r$sd[1] + X_stats_interaction$Z_Shift_AUC <- X_stats_interaction$Raw_Shift_AUC[1]/X_stats_BY_AUC$sd[1] + } + + + #get WT vals + X_stats_interaction$WT_l <- X_stats_BY_L$mean + X_stats_interaction$WT_K <- X_stats_BY_K$mean + X_stats_interaction$WT_r <- X_stats_BY_r$mean + X_stats_interaction$WT_AUC <- X_stats_BY_AUC$mean + + #Get WT SD + X_stats_interaction$WT_sd_l <- X_stats_BY_L$sd + X_stats_interaction$WT_sd_K <- X_stats_BY_K$sd + X_stats_interaction$WT_sd_r <- X_stats_BY_r$sd + X_stats_interaction$WT_sd_AUC <- X_stats_BY_AUC$sd + + + #only get scores if there's growth at no drug + if(X_stats_interaction$mean_L[1] != 0 & !is.na(X_stats_interaction$mean_L[1])){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for no growth values in Z score calculation + if(sum(X_stats_interaction$NG,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #get linear model + gene_lm_L <- lm(formula = Delta_L ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_K <- lm(formula = Delta_K ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_r <- lm(formula = Delta_r ~ Conc_Num_Factor,data = X_stats_interaction) + gene_lm_AUC <- lm(formula = Delta_AUC ~ Conc_Num_Factor,data = X_stats_interaction) + + #get interaction score calculated by linear model and R-squared value for the fit + gene_interaction_L <- MAX_CONC*(gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- MAX_CONC*(gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- MAX_CONC*(gene_lm_r$coefficients[2]) + gene_lm_AUC$coefficients[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + #Get total of non removed values + Num_non_Removed_Conc <- Total_Conc_Nums - sum(X_stats_interaction$DB,na.rm = TRUE) - 1 + + #report the scores + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_L + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_L-lm_mean_L)/lm_sd_L + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE) + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_L,na.rm = TRUE)/(Num_non_Removed_Conc) + + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_K + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_K-lm_mean_K)/lm_sd_K + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE) + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_K,na.rm = TRUE)/(Num_non_Removed_Conc) + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_r + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_r-lm_mean_r)/lm_sd_r + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE) + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_r,na.rm = TRUE)/(Total_Conc_Nums-1) + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- gene_interaction_AUC + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- (gene_interaction_AUC-lm_mean_AUC)/lm_sd_AUC + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE) + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$Zscore_AUC,na.rm = TRUE)/(Total_Conc_Nums-1) + } + + if(X_stats_interaction$mean_L[1] == 0 | is.na(X_stats_interaction$mean_L[1]) ){ + + #calculate expected values + X_stats_interaction$Exp_L <- X_stats_interaction$WT_l + X_stats_interaction$Raw_Shift_L + X_stats_interaction$Exp_K <- X_stats_interaction$WT_K + X_stats_interaction$Raw_Shift_K + X_stats_interaction$Exp_r <- X_stats_interaction$WT_r + X_stats_interaction$Raw_Shift_r + X_stats_interaction$Exp_AUC <- X_stats_interaction$WT_AUC + X_stats_interaction$Raw_Shift_AUC + + #calculate normalized delta values + X_stats_interaction$Delta_L <- X_stats_interaction$mean_L - X_stats_interaction$Exp_L + X_stats_interaction$Delta_K <- X_stats_interaction$mean_K - X_stats_interaction$Exp_K + X_stats_interaction$Delta_r <-X_stats_interaction$mean_r - X_stats_interaction$Exp_r + X_stats_interaction$Delta_AUC <- X_stats_interaction$mean_AUC - X_stats_interaction$Exp_AUC + + #disregard shift for missing values in Z score calculatiom + if(sum(X_stats_interaction$NG,na.rm = TRUE) > 0){ + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_L - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_l + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_K - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_K + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_r - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_r + X_stats_interaction[X_stats_interaction$NG == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$NG == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$NG == 1,]$WT_AUC + } + #disregard shift for set to max values in Z score calculation + if(sum(X_stats_interaction$SM,na.rm=TRUE) > 0){ + X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_L <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_L - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_l + #only calculate the L value without shift since L is the only adjusted value + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_K <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_K - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_K + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_r <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_r - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_r + #X_stats_interaction[X_stats_interaction$SM == 1,]$Delta_AUC <- X_stats_interaction[X_stats_interaction$SM == 1,]$mean_AUC - X_stats_interaction[X_stats_interaction$SM == 1,]$WT_AUC + + } + + + #calculate Z score at each concentration + X_stats_interaction$Zscore_L <- (X_stats_interaction$Delta_L)/(X_stats_interaction$WT_sd_l) + X_stats_interaction$Zscore_K <- (X_stats_interaction$Delta_K)/(X_stats_interaction$WT_sd_K) + X_stats_interaction$Zscore_r <- (X_stats_interaction$Delta_r)/(X_stats_interaction$WT_sd_r) + X_stats_interaction$Zscore_AUC <- (X_stats_interaction$Delta_AUC)/(X_stats_interaction$WT_sd_AUC) + + #NA values for the next part since there's an NA or 0 at the no drug. + gene_lm_L <- NA + gene_lm_K <- NA + gene_lm_r <- NA + + gene_interaction_L <- NA + r_squared_l <- NA + gene_interaction_K <- NA + r_squared_K <- NA + gene_interaction_r <- NA + r_squared_r <- NA + + X_stats_interaction$Raw_Shift_L <- NA + X_stats_interaction$Raw_Shift_K <- NA + X_stats_interaction$Raw_Shift_r <- NA + X_stats_interaction$Raw_Shift_AUC <- NA + + X_stats_interaction$Z_Shift_L <- NA + X_stats_interaction$Z_Shift_K <- NA + X_stats_interaction$Z_Shift_r <- NA + X_stats_interaction$Z_Shift_AUC <- NA + + InteractionScores$OrfRep[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$OrfRep[1]) + InteractionScores$Gene[InteractionScores$OrfRep == Gene_Sel] <- as.character(X_Gene_Sel$Gene[1]) + + InteractionScores$Raw_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_L[1] + InteractionScores$Z_Shift_L[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_L[1] + InteractionScores$lm_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_L[InteractionScores$OrfRep == Gene_Sel] <- r_squared_l + InteractionScores$Sum_Z_Score_L[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_L[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_K[1] + InteractionScores$Z_Shift_K[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_K[1] + InteractionScores$lm_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_K[InteractionScores$OrfRep == Gene_Sel] <-NA + InteractionScores$R_Squared_K[InteractionScores$OrfRep == Gene_Sel] <- r_squared_K + InteractionScores$Sum_Z_Score_K[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_K[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_r[1] + InteractionScores$Z_Shift_r[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_r[1] + InteractionScores$lm_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_r[InteractionScores$OrfRep == Gene_Sel] <- r_squared_r + InteractionScores$Sum_Z_Score_r[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_r[InteractionScores$OrfRep == Gene_Sel] <- NA + + InteractionScores$Raw_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Raw_Shift_AUC[1] + InteractionScores$Z_Shift_AUC[InteractionScores$OrfRep == Gene_Sel] <- X_stats_interaction$Z_Shift_AUC[1] + InteractionScores$lm_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Z_lm_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$R_Squared_AUC[InteractionScores$OrfRep == Gene_Sel] <- r_squared_AUC + InteractionScores$Sum_Z_Score_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + InteractionScores$Avg_Zscore_AUC[InteractionScores$OrfRep == Gene_Sel] <- NA + } + + if(i == 1){ + X_stats_interaction_ALL <- X_stats_interaction + } + if(i > 1){ + X_stats_interaction_ALL <- rbind(X_stats_interaction_ALL,X_stats_interaction) + } + + InteractionScores$NG[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$NG,na.rm = TRUE) + InteractionScores$DB[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$DB,na.rm = TRUE) + InteractionScores$SM[InteractionScores$OrfRep == Gene_Sel] <- sum(X_stats_interaction$SM,na.rm = TRUE) + + #X_stats_L_int_temp <- rbind(X_stats_L_int_temp,X_stats_L_int) + + + } + print("Pass Int Calculation loop") + InteractionScores <- InteractionScores[order(InteractionScores$Z_lm_L,decreasing=TRUE),] + InteractionScores <- InteractionScores[order(InteractionScores$NG,decreasing=TRUE),] + df_order_by_OrfRep <- unique(InteractionScores$OrfRep) + #X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG,decreasing=TRUE),] + write.csv(InteractionScores,paste(outputpath,"ZScores_Interaction.csv",sep=""),row.names=FALSE) + + InteractionScores_deletion_enhancers_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_K <- InteractionScores[InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores[InteractionScores$Avg_Zscore_L >= 2 | InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores[InteractionScores$Avg_Zscore_K >= 2 | InteractionScores$Avg_Zscore_K <= -2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores[InteractionScores$Z_lm_L >= 2 & InteractionScores$Avg_Zscore_L <= -2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores[InteractionScores$Z_lm_L <= -2 & InteractionScores$Avg_Zscore_L >= 2,] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores[InteractionScores$Z_lm_K <= -2 & InteractionScores$Avg_Zscore_K >= 2,] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores[InteractionScores$Z_lm_K >= 2 & InteractionScores$Avg_Zscore_K <= -2,] + + InteractionScores_deletion_enhancers_L <- InteractionScores_deletion_enhancers_L[!is.na(InteractionScores_deletion_enhancers_L$OrfRep),] + InteractionScores_deletion_enhancers_K <- InteractionScores_deletion_enhancers_K[!is.na(InteractionScores_deletion_enhancers_K$OrfRep),] + InteractionScores_deletion_suppressors_L <- InteractionScores_deletion_suppressors_L[!is.na(InteractionScores_deletion_suppressors_L$OrfRep),] + InteractionScores_deletion_suppressors_K <- InteractionScores_deletion_suppressors_K[!is.na(InteractionScores_deletion_suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L <- InteractionScores_deletion_enhancers_and_Suppressors_L[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K <- InteractionScores_deletion_enhancers_and_Suppressors_K[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L$OrfRep),] + InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K <- InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K[!is.na(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K$OrfRep),] + InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K <- InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,paste(outputpath,"ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,paste(outputpath,"ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",sep=""),row.names=FALSE) + + #get enhancers and suppressors for linear regression + InteractionScores_deletion_enhancers_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2,] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores[InteractionScores$Z_lm_K <= -2,] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2,] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores[InteractionScores$Z_lm_L >= 2 | InteractionScores$Z_lm_L <= -2,] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores[InteractionScores$Z_lm_K >= 2 | InteractionScores$Z_lm_K <= -2,] + + InteractionScores_deletion_enhancers_L_lm <- InteractionScores_deletion_enhancers_L_lm[!is.na(InteractionScores_deletion_enhancers_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_K_lm <- InteractionScores_deletion_enhancers_K_lm[!is.na(InteractionScores_deletion_enhancers_K_lm$OrfRep),] + InteractionScores_deletion_suppressors_L_lm <- InteractionScores_deletion_suppressors_L_lm[!is.na(InteractionScores_deletion_suppressors_L_lm$OrfRep),] + InteractionScores_deletion_suppressors_K_lm <- InteractionScores_deletion_suppressors_K_lm[!is.na(InteractionScores_deletion_suppressors_K_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_L_lm <- InteractionScores_deletion_enhancers_and_Suppressors_L_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_L_lm$OrfRep),] + InteractionScores_deletion_enhancers_and_Suppressors_K_lm <- InteractionScores_deletion_enhancers_and_Suppressors_K_lm[!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep),] + + write.csv(InteractionScores_deletion_enhancers_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionSuppressors_K_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",sep=""),row.names=FALSE) + write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,paste(outputpath,"ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",sep=""),row.names=FALSE) + + + write.csv(Labels,file=paste("../Code/StudyInfo.csv"),row.names = FALSE) + print('ln 1570 write StudyInfo.csv after ') + #write.table(Labels,file=paste("../Code/StudyInfo.txt"),sep="\t",row.names = FALSE) + + for(i in 1:num_genes){ + Gene_Sel <- unique(InteractionScores$OrfRep)[i] + X_ZCalculations <- X_stats_interaction_ALL[X_stats_interaction_ALL$OrfRep == Gene_Sel,] + X_Int_Scores <- InteractionScores[InteractionScores$OrfRep == Gene_Sel,] + + p_l[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_L)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_l), ymax=0+(2*WT_sd_l)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_L,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_L,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_L,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_K[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_K)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-65,65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_K), ymax=0+(2*WT_sd_K)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_K,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_K,2))) + + annotate("text",x=1,y=25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_K,2))) + + annotate("text",x=1,y=-25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-60,-50,-40,-30,-20,-10,0,10,20,30,40,50,60)) + + theme_Publication() + + p_r[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_r)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-0.65,0.65)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_r), ymax=0+(2*WT_sd_r)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=0.45,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_r,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=0.35,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_r,2))) + + annotate("text",x=1,y=0.25,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_r,2))) + + annotate("text",x=1,y=-0.25,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-0.35,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-0.45,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6)) + + theme_Publication() + + p_AUC[[i]] <- ggplot(X_ZCalculations,aes(Conc_Num_Factor,Delta_AUC)) + geom_point() + geom_smooth(method="lm",formula=y~x,se=FALSE) + + coord_cartesian(ylim = c(-6500,6500)) + + geom_errorbar(aes(ymin=0-(2*WT_sd_AUC), ymax=0+(2*WT_sd_AUC)),alpha=0.3) + + ggtitle(paste(X_ZCalculations$OrfRep[1],X_ZCalculations$Gene[1],sep=" ")) + + annotate("text",x=1,y=4500,label = paste("ZShift =",round(X_Int_Scores$Z_Shift_AUC,2))) + scale_color_discrete(guide = FALSE) + + #annotate("text",x=1,y=3500,label = paste("Avg ZScore =",round(X_Int_Scores$Avg_Zscore_AUC,2))) + + annotate("text",x=1,y=2500,label = paste("Z lm Score =",round(X_Int_Scores$Z_lm_AUC,2))) + + annotate("text",x=1,y=-2500,label = paste("NG =",X_Int_Scores$NG)) + + annotate("text",x=1,y=-3500,label = paste("DB =",X_Int_Scores$DB)) + + annotate("text",x=1,y=-4500,label = paste("SM =",X_Int_Scores$SM)) + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X_ZCalculations$Conc_Num_Factor),labels = unique(as.character(X_ZCalculations$Conc_Num))) + + scale_y_continuous(breaks = c(-6000,-5000,-4000,-3000,-2000,-1000,0,1000,2000,3000,4000,5000,6000)) + + theme_Publication() + + + + if(i == 1){ + X_stats_interaction_ALL_final <- X_ZCalculations + } + if(i > 1){ + X_stats_interaction_ALL_final <- rbind(X_stats_interaction_ALL_final,X_ZCalculations) + } + } + print("Pass Int ggplot loop") + write.csv(X_stats_interaction_ALL_final,paste(outputpath,"ZScore_Calculations.csv",sep=""),row.names = FALSE) + + + + + + + Blank <- ggplot(X2_RF) + geom_blank() + + pdf(paste(outputpath,"InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes)/3)-1)){ + num <- j[m] + grid.arrange(p_l[[num]],p_K[[num]],p_r[[num]],p_AUC[[num]],p_l[[num+1]],p_K[[num+1]],p_r[[num+1]],p_AUC[[num+1]],p_l[[num+2]],p_K[[num+2]],p_r[[num+2]],p_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_l[[364]],p_K[[364]],p_r[[364]],p_l[[365]],p_K[[365]],p_r[[365]],p_l[[366]],p_K[[366]],p_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_K[[num+3]],p_r[[num+3]],p1[[num+4]],p_K[[num+4]],p_r[[num+4]] + } + if(num_genes != (num+2)){ + total_num = num_genes - (num+2) + if(total_num == 5){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],p_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],p_l[[num+5]],p_K[[num+5]],p_r[[num+5]],p_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],p_l[[num+4]],p_K[[num+4]],p_r[[num+4]],p_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_l[[num+3]],p_K[[num+3]],p_r[[num+3]],p_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_l[[num+6]],p_K[[num+6]],p_r[[num+6]],p_l[[num+7]],p_K[[num+7]],p_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + + + pdf(paste(outputpath,"RF_InteractionPlots.pdf",sep=""),width = 16, height = 16, onefile = TRUE) + + X_stats_X2_RF <- ddply(X2_RF, c("Conc_Num","Conc_Num_Factor"), summarise, + mean_L = mean(l,na.rm=TRUE), + median_L = median(l,na.rm=TRUE), + max_L = max(l,na.rm=TRUE), + min_L = min(l,na.rm=TRUE), + sd_L = sd(l,na.rm=TRUE), + mean_K = mean(K,na.rm=TRUE), + median_K = median(K,na.rm=TRUE), + max_K = max(K,na.rm=TRUE), + min_K = min(K,na.rm=TRUE), + sd_K = sd(K,na.rm=TRUE), + mean_r = mean(r,na.rm=TRUE), + median_r = median(r,na.rm=TRUE), + max_r = max(r,na.rm=TRUE), + min_r = min(r,na.rm=TRUE), + sd_r = sd(r,na.rm=TRUE), + mean_AUC = mean(AUC,na.rm=TRUE), + median_AUC = median(AUC,na.rm=TRUE), + max_AUC = max(AUC,na.rm=TRUE), + min_AUC = min(AUC,na.rm=TRUE), + sd_AUC = sd(AUC,na.rm=TRUE), + NG = sum(NG,na.rm=TRUE), + DB = sum(DB,na.rm=TRUE), + SM = sum(SM,na.rm=TRUE) + ) + + + L_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,l)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + annotate("text",x=-0.25,y=10,label="NG") + + annotate("text",x=-0.25,y=5,label="DB") + + annotate("text",x=-0.25,y=0,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=0,label=X_stats_X2_RF$SM) + + theme_Publication() + + K_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,K)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(-20,160)) + + annotate("text",x=-0.25,y=-5,label="NG") + + annotate("text",x=-0.25,y=-12.5,label="DB") + + annotate("text",x=-0.25,y=-20,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-5,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-12.5,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=-20,label=X_stats_X2_RF$SM) + + theme_Publication() + + R_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,r)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + annotate("text",x=-0.25,y=.9,label="NG") + + annotate("text",x=-0.25,y=.8,label="DB") + + annotate("text",x=-0.25,y=.7,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.9,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.8,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=.7,label=X_stats_X2_RF$SM) + + theme_Publication() + + AUC_Stats <- ggplot(X2_RF,aes(Conc_Num_Factor,AUC)) + geom_point(position="jitter",size=1) + + stat_summary(fun.y = mean, fun.ymin = function(x) mean(x) - sd(x), fun.ymax = function(x) mean(x) + sd(x), + geom = "errorbar",color="red") + stat_summary(fun.y = mean, geom = "point",color="red") + + scale_x_continuous(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(0,12500)) + + annotate("text",x=-0.25,y=11000,label="NG") + + annotate("text",x=-0.25,y=10000,label="DB") + + annotate("text",x=-0.25,y=9000,label="SM") + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=11000,label=X_stats_X2_RF$NG) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=10000,label=X_stats_X2_RF$DB) + + annotate("text",x=c(unique(X2_RF$Conc_Num_Factor)),y=9000,label=X_stats_X2_RF$SM) + + theme_Publication() + + + L_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),l)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for L with SD", sep = " ")) + coord_cartesian(ylim=c(0,130)) + + theme_Publication() + + K_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),K)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for K with SD", sep = " ")) + coord_cartesian(ylim=c(0,160)) + + theme_Publication() + + r_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),r)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for r with SD", sep = " ")) + coord_cartesian(ylim=c(0,1)) + + theme_Publication() + + AUC_Stats_Box <- ggplot(X2_RF,aes(as.factor(Conc_Num_Factor),AUC)) + geom_boxplot() + + scale_x_discrete(name = unique(X$Drug[1]),breaks = unique(X2_RF$Conc_Num_Factor),labels = as.character(unique(X2_RF$Conc_Num))) + + ggtitle(paste(s,"Scatter RF for AUC with SD", sep = " ")) + coord_cartesian(ylim=c(12000,0)) + + theme_Publication() + + + grid.arrange(L_Stats,K_Stats,R_Stats,AUC_Stats,ncol=2,nrow=2) + grid.arrange(L_Stats_Box,K_Stats_Box,r_Stats_Box,AUC_Stats_Box,ncol=2,nrow=2) + + + + #plot the references + #grid.arrange(p3,p3_K,p3_r,p4,p4_K,p4_r,p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=4) + #grid.arrange(p5,p5_K,p5_r,p6,p6_K,p6_r,ncol=3,nrow=2) + + #loop for grid arrange 4x3 + j <- rep(1,((num_genes_RF)/3)-1) + for(n in 1:length(j)){ + j[n+1] <- n*3 + 1 + } + #loop for printing each plot + num <- 0 + for(m in 1:(round((num_genes_RF)/3)-1)){ + num <- j[m] + grid.arrange(p_rf_l[[num]],p_rf_K[[num]],p_rf_r[[num]],p_rf_AUC[[num]],p_rf_l[[num+1]],p_rf_K[[num+1]],p_rf_r[[num+1]],p_rf_AUC[[num+1]],p_rf_l[[num+2]],p_rf_K[[num+2]],p_rf_r[[num+2]],p_rf_AUC[[num+2]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[364]],p_rf_K[[364]],p_rf_r[[364]],p_rf_l[[365]],p_rf_K[[365]],p_rf_r[[365]],p_rf_l[[366]],p_rf_K[[366]],p_rf_r[[366]],ncol=3,nrow=3) + + + #p1[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p1[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]] + } + if(num_genes_RF != (num+2)){ + total_num = num_genes_RF - (num+2) + if(total_num == 5){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],p_rf_AUC[[num+7]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + if(total_num == 4){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_AUC[[num+6]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + } + + if(total_num == 3){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],p_rf_l[[num+5]],p_rf_K[[num+5]],p_rf_r[[num+5]],p_rf_AUC[[num+5]],ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 2){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],p_rf_l[[num+4]],p_rf_K[[num+4]],p_rf_r[[num+4]],p_rf_AUC[[num+4]],Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + + if(total_num == 1){ + grid.arrange(p_rf_l[[num+3]],p_rf_K[[num+3]],p_rf_r[[num+3]],p_rf_AUC[[num+3]],Blank,Blank,Blank,Blank,Blank,Blank,Blank,Blank,ncol=4,nrow=3) + #grid.arrange(p_rf_l[[num+6]],p_rf_K[[num+6]],p_rf_r[[num+6]],p_rf_l[[num+7]],p_rf_K[[num+7]],p_rf_r[[num+7]],Blank,Blank,Blank,ncol=3,nrow=3) + } + } + dev.off() + + #print rank plots for L and K gene interactions + + + InteractionScores_AdjustMissing <- InteractionScores + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_L),]$Avg_Zscore_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_K),]$Avg_Zscore_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_r),]$Avg_Zscore_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Avg_Zscore_AUC),]$Avg_Zscore_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank <- NA + InteractionScores_AdjustMissing$K_Rank <- NA + InteractionScores_AdjustMissing$r_Rank <- NA + InteractionScores_AdjustMissing$AUC_Rank <- NA + + InteractionScores_AdjustMissing$L_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_L) + InteractionScores_AdjustMissing$K_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_K) + InteractionScores_AdjustMissing$r_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_r) + InteractionScores_AdjustMissing$AUC_Rank <- rank(InteractionScores_AdjustMissing$Avg_Zscore_AUC) + + # + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_L),]$Z_lm_L <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_K),]$Z_lm_K <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_r),]$Z_lm_r <- 0.001 + InteractionScores_AdjustMissing[is.na(InteractionScores_AdjustMissing$Z_lm_AUC),]$Z_lm_AUC <- 0.001 + + InteractionScores_AdjustMissing$L_Rank_lm <- NA + InteractionScores_AdjustMissing$K_Rank_lm <- NA + InteractionScores_AdjustMissing$r_Rank_lm <- NA + InteractionScores_AdjustMissing$AUC_Rank_lm <- NA + + InteractionScores_AdjustMissing$L_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_L) + InteractionScores_AdjustMissing$K_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_K) + InteractionScores_AdjustMissing$r_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_r) + InteractionScores_AdjustMissing$AUC_Rank_lm <- rank(InteractionScores_AdjustMissing$Z_lm_AUC) + + + + Rank_L_1SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(InteractionScores_AdjustMissing)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(InteractionScores_AdjustMissing[InteractionScores_AdjustMissing$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(InteractionScores_AdjustMissing,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + + + + X_NArm <- InteractionScores[!is.na(InteractionScores$Z_lm_L) | !is.na(InteractionScores$Avg_Zscore_L) ,] + + #find overlaps + X_NArm$Overlap <- "No Effect" + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Both") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Both") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= 2,]$Overlap <- "Deletion Enhancer lm only") + try(X_NArm[X_NArm$Z_lm_L <= 2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Enhancer Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= -2,]$Overlap <- "Deletion Suppressor lm only") + try(X_NArm[X_NArm$Z_lm_L >= -2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Suppressor Avg Zscore only") + try(X_NArm[X_NArm$Z_lm_L >= 2 & X_NArm$Avg_Zscore_L <= -2,]$Overlap <- "Deletion Enhancer lm, Deletion Suppressor Avg Z score") + try(X_NArm[X_NArm$Z_lm_L <= -2 & X_NArm$Avg_Zscore_L >= 2,]$Overlap <- "Deletion Suppressor lm, Deletion Enhancer Avg Z score") + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L <- lm(X_NArm$Z_lm_L~X_NArm$Avg_Zscore_L) + L_lm <- summary(get_lm_L) + + get_lm_K <- lm(X_NArm$Z_lm_K~X_NArm$Avg_Zscore_K) + K_lm <- summary(get_lm_K) + + get_lm_r <- lm(X_NArm$Z_lm_r~X_NArm$Avg_Zscore_r) + r_lm <- summary(get_lm_r) + + get_lm_AUC <- lm(X_NArm$Z_lm_AUC~X_NArm$Avg_Zscore_AUC) + AUC_lm <- summary(get_lm_AUC) + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_K,Z_lm_K)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm K") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(K_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_r,Z_lm_r)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm r") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(r_lm$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(Avg_Zscore_AUC,Z_lm_AUC)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Avg Zscore vs lm AUC") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(AUC_lm$r.squared,2))) + theme_Publication_legend_right()) + + dev.off() + + + lm_v_Zscore_L <- ggplot(X_NArm,aes(Avg_Zscore_L,Z_lm_L,ORF=OrfRep,Gene=Gene,NG=NG,SM=SM,DB=DB)) + geom_point(aes(color=Overlap),shape=3) + + geom_smooth(method = "lm",color=1) + ggtitle("Avg Zscore vs lm L") + + geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm$r.squared,2))) + theme_Publication_legend_right() + + pgg <- ggplotly(lm_v_Zscore_L) + #pgg + plotly_path <- paste(getwd(),"/",outputpath,"Avg_Zscore_vs_lm_NA_rm.html",sep="") + saveWidget(pgg, file=plotly_path, selfcontained =TRUE) + + X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L) + X_NArm$K_Rank <- rank(X_NArm$Avg_Zscore_K) + X_NArm$r_Rank <- rank(X_NArm$Avg_Zscore_r) + X_NArm$AUC_Rank <- rank(X_NArm$Avg_Zscore_AUC) + + X_NArm$L_Rank_lm <- rank(X_NArm$Z_lm_L) + X_NArm$K_Rank_lm <- rank(X_NArm$Z_lm_K) + X_NArm$r_Rank_lm <- rank(X_NArm$Z_lm_r) + X_NArm$AUC_Rank_lm <- rank(X_NArm$Z_lm_AUC) + + #get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 + get_lm_L2 <- lm(X_NArm$L_Rank_lm~X_NArm$L_Rank) + L_lm2 <- summary(get_lm_L2) + + get_lm_K2 <- lm(X_NArm$K_Rank_lm~X_NArm$K_Rank) + K_lm2 <- summary(get_lm_K2) + + get_lm_r2 <- lm(X_NArm$r_Rank_lm~X_NArm$r_Rank) + r_lm2 <- summary(get_lm_r2) + + get_lm_AUC2 <- lm(X_NArm$AUC_Rank_lm~X_NArm$AUC_Rank) + AUC_lm2 <- summary(get_lm_AUC2) + + num_genes_NArm2 <- (dim(X_NArm)[1])/2 + + pdf(paste(outputpath,"Avg_Zscore_vs_lm_ranked_NA_rm.pdf",sep=""),width = 16, height = 12, onefile = TRUE) + + print(ggplot(X_NArm,aes(L_Rank,L_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm L") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(L_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(K_Rank,K_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm K") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(K_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(r_Rank,r_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank Avg Zscore vs lm r") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(r_lm2$r.squared,2))) + theme_Publication_legend_right()) + + print(ggplot(X_NArm,aes(AUC_Rank,AUC_Rank_lm)) + geom_point(aes(color=Overlap),shape=3) + geom_smooth(method = "lm",color=1) + + ggtitle("Rank of Avg Zscore vs lm AUC") + + annotate("text",x=num_genes_NArm2,y=num_genes_NArm2,label = paste("R-squared = ",round(AUC_lm2$r.squared,2))) + theme_Publication_legend_right()) + + + + dev.off() + + + + Rank_L_1SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Avg_Zscore_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Avg_Zscore_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext <- ggplot(X_NArm,aes(L_Rank,Avg_Zscore_L)) + + ggtitle("Average Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Avg Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext <- ggplot(X_NArm,aes(K_Rank,Avg_Zscore_K)) + + ggtitle("Average Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Avg Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + pdf(paste(outputpath,"RankPlots_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD,Rank_L_2SD,Rank_L_3SD,Rank_K_1SD,Rank_K_2SD,Rank_K_3SD,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext,Rank_L_2SD_notext,Rank_L_3SD_notext,Rank_K_1SD_notext,Rank_K_2SD_notext,Rank_K_3SD_notext,ncol=3,nrow=2) + + dev.off() + + + Rank_L_1SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -1,])[1])) + + theme_Publication() + + Rank_L_2SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -2,])[1])) + + theme_Publication() + + Rank_L_3SD_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_L >= 3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_L <= -3,])[1])) + + theme_Publication() + + + Rank_K_1SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -1,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 1,])[1])) + + theme_Publication() + + Rank_K_2SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -2,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 2,])[1])) + + theme_Publication() + + Rank_K_3SD_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + annotate("text",x=(dim(X_NArm)[1]/2),y=-10,label=paste("Deletion Enhancers =",dim(X_NArm[X_NArm$Z_lm_K <= -3,])[1])) + + annotate("text",x=(dim(X_NArm)[1]/2),y=10,label=paste("Deletion Suppressors =",dim(X_NArm[X_NArm$Z_lm_K >= 3,])[1])) + + theme_Publication() + + + Rank_L_1SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 1SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_2SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 2SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_L_3SD_notext_lm <- ggplot(X_NArm,aes(L_Rank_lm,Z_lm_L)) + + ggtitle("Interaction Z score vs. Rank for L above 3SD") + xlab("Rank") + ylab("Int Z score L") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + + Rank_K_1SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 1SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (1),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-1),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-1,1)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_2SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 2SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (2),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-2),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-2,2)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + Rank_K_3SD_notext_lm <- ggplot(X_NArm,aes(K_Rank_lm,Z_lm_K)) + + ggtitle("Interaction Z score vs. Rank for K above 3SD") + xlab("Rank") + ylab("Int Z score K") + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (3),ymax = Inf,fill="#542788", alpha=0.3) + + annotate("rect",xmin = -Inf, xmax = Inf, ymin = (-3),ymax = -Inf,fill="orange", alpha=0.3) + + geom_hline(yintercept=c(-3,3)) + geom_point(size=0.1,shape=3) + + theme_Publication() + + pdf(paste(outputpath,"RankPlots_lm_naRM.pdf",sep=""),width = 18, height = 12, onefile = TRUE) + + grid.arrange(Rank_L_1SD_lm,Rank_L_2SD_lm,Rank_L_3SD_lm,Rank_K_1SD_lm,Rank_K_2SD_lm,Rank_K_3SD_lm,ncol=3,nrow=2) + grid.arrange(Rank_L_1SD_notext_lm,Rank_L_2SD_notext_lm,Rank_L_3SD_notext_lm,Rank_K_1SD_notext_lm,Rank_K_2SD_notext_lm,Rank_K_3SD_notext_lm,ncol=3,nrow=2) + + dev.off() + +} + + + +#get the linear model info and the r-squared value for all CPPs in results 1 vs results 2 +get_lm_1 <- lm(X_NArm$Z_lm_K~X_NArm$Z_lm_L) +L_lm_1 <- summary(get_lm_1) + +get_lm_2 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_L) +L_lm_2 <- summary(get_lm_2) + +get_lm_3 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_L) +L_lm_3 <- summary(get_lm_3) + +get_lm_4 <- lm(X_NArm$Z_lm_r~X_NArm$Z_lm_K) +L_lm_4 <- summary(get_lm_4) + +get_lm_5 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_K) +L_lm_5 <- summary(get_lm_5) + +get_lm_6 <- lm(X_NArm$Z_lm_AUC~X_NArm$Z_lm_r) +L_lm_6 <- summary(get_lm_6) + + +pdf(file=paste(outputpath,"Correlation_CPPs.pdf",sep=""),width = 10, height = 7, onefile = TRUE) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_smooth(method="lm",color="tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +InteractionScores_RF2 <- InteractionScores_RF[!is.na(InteractionScores_RF$Z_lm_L),] +ggplot(X_NArm,aes(Z_lm_L,Z_lm_K)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_K),color="cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_1$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_r),color="cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_2$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_L,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_L,Z_lm_AUC),color="cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_3$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_r)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_r),color="cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_4$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_K,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_K,Z_lm_AUC),color="cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_5$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + +ggplot(X_NArm,aes(Z_lm_r,Z_lm_AUC)) + geom_point(shape=3,color="gray70") + + geom_point(data=InteractionScores_RF2,aes(Z_lm_r,Z_lm_AUC),color="cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text",x=0,y=0,label = paste("R-squared = ",round(L_lm_6$r.squared,3))) + + theme_Publication_legend_right() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size=16),axis.title.x = element_text(size=18), axis.text.y = element_text(size=16),axis.title.y = element_text(size=18)) + + + + + +dev.off() + + +#write.csv(Labels,file=paste("../Code/Parameters.csv"),row.names = FALSE) +timestamp() + +#BoneYard*********************************************** +#I'm thinking this parameter needs to be save somewhere "permanent' for the record so outputs can be reproduced. +#take this out of the Arguments. In Matlab I could for future in .mat file. Maybe I could save the SD Args[2] as part of the StudyInfo.txt. +#Corruptable but better than nothing. +#if(is.na(Args[2])){ +# std=3 +#}else { +# std= Arg[2] +#Delta_Background_sdFactor <- 2 #Args[3] +#DelBGFactr <- as.numeric(Delta_Background_sdFactor) +#} + diff --git a/workflow/.old/templates/qhtcp/REMcJar2.sh b/workflow/.old/templates/qhtcp/REMcJar2.sh new file mode 100644 index 00000000..01c6d248 --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcJar2.sh @@ -0,0 +1,25 @@ + +#cp REMcRdy_lm_only.csv-finalTable.csv REMcRdy_lm_only.csv-finalTableBUP.csv +#rm REMcRdy_lm_only.csv-finalTable.csv +#userpath= sudo -u root -H -s eval 'echo $PWD' +#/mnt/data/java-1.8.0-openjdk-1.8.0.352.b08-2.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +#/Users/anyamcdaniel/Downloads/HartmanLab/StudiesQHTCP/REMcJava/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true +# figure out how to do this on a mac based on where REMcJava is locatedcu +#/Users/anyamcdaniel/Downloads/HartmanLab/StudiesQHTCP/2PE_BMH21_RM_TrialOnMac/REMc/REMcJava/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +#/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +#wait +cp REMcRdy_lm_only.csv-finalTable.csv REMcRdy_lm_only.csv-finalTableBUP.csv +rm REMcRdy_lm_only.csv-finalTable.csv + +current_dir="$PWD" + +java_executable="java" # Assuming java is available in the system's PATH + +classpath="$current_dir/jingyuJava_1_7_extractLib.jar" + +"$java_executable" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain "$current_dir/REMcRdy_lm_only.csv" "$current_dir/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" "$current_dir/ORF_List_Without_DAmPs.txt" 1 true true + +wait diff --git a/workflow/.old/templates/qhtcp/REMcJar2old.sh b/workflow/.old/templates/qhtcp/REMcJar2old.sh new file mode 100644 index 00000000..e3117b36 --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcJar2old.sh @@ -0,0 +1,10 @@ + +cp REMcRdy_lm_only.csv-finalTable.csv REMcRdy_lm_only.csv-finalTableBUP.csv +rm REMcRdy_lm_only.csv-finalTable.csv +/mnt/data/REMcJava/java-1.8.0-openjdk-1.8.0.352.b08-2.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +#/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +#/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7_9.x86_64/jre/bin/java -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath ./jingyuJava_1_7_extractLib.jar ExecMain ./REMcRdy_lm_only.csv GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab ORF_List_Without_DAmPs.txt 1 true true + +wait diff --git a/workflow/.old/templates/qhtcp/REMcMaster2.sh b/workflow/.old/templates/qhtcp/REMcMaster2.sh new file mode 100644 index 00000000..e6ccdd52 --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcMaster2.sh @@ -0,0 +1,92 @@ +#REMcMaster.sh Is to perform all REMc task and to operate from source directory REMc +#Step 0 Perform JoinInteractExps3dev.R to create the "REMcRdy_lm_only.csv" and "Shift_only.csv" files +Rscript JoinInteractExps3dev.R + +#Step1 Perform REMc java utility using file input file REMcRdy_lm_only.csv +#and producing REMcRdy_lm_only.csv-finalTable.csv +sh ./REMcJar2.sh +wait #wait isn't holding process until Jar is complete. Should take at least 3minuites + +#Step 2 +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + #Jarflag= 1 + #echo "Jarflag = $Jarflag" +echo "REMcJar... .sh completed" +else +echo "REMcJar failed" +wait +exit +fi +echo "start Step2" +#Step2 If REMcJar successfully produces "REMcRdy_lm_only.csv-finalTable.csv" + #then add shift values back to the "REMcRdy_lm_only.csv-finalTable.csv" + #and output it as "REMcWithShift.csv" to be used to produce the REMc Heatmaps. +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + Rscript ./AddShiftVals2.R + wait + echo "AddShiftVals2.R executed" + wait + fi + +#Step3 Execute REMcHeatmaps_zscores.R contingent upon "REMcWithShift.csv" being created. +cd REMcHeatmaps +if [ -f "REMcWithShift.csv" ]; then + echo "REMcWithShift.csv exists." +fi +cd .. + Rscript REMcHeatmaps_zscores.R + wait + echo "REMcHeatmaps executed" + wait + pdftk *.pdf output compiledREMcHeatmaps.pdf + wait + +#Step4 Begin the process of GTF contingent upon "REMcRdy_lm_only.csv-finalTable.csv" +# being produced by "REMcJar.sh" (Step1) +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + python2 DconJG2.py REMcRdy_lm_only.csv-finalTable.csv GTF/Process/ + #python2 DconJG.py REMcRdy_lm_only.csv-finalTable.csv 12 GTF/Process/ +else + echo "DconJG.py failed" + wait + exit + +fi + +if [ -f "GTF/Process/REMcRdy_lm_only/1-0-0-finaltable.csv" ]; then + echo "Begin copying REMcRdy_lm_only to /Function and /Component" + cp -r GTF/Process/REMcRdy_lm_only GTF/Function/ + echo "Copy to Function complete" + wait + cp -r GTF/Process/REMcRdy_lm_only GTF/Component/ + echo "Copy to Component complete" + + wait + +else + echo "COPY failed" + wait + exit + +fi + +if [ -f "GTF/Function/REMcRdy_lm_only/1-0-0-finaltable.csv" ]; then + echo "Begin concurrent running of GTF tasks" + sh ./mProcess.sh & + sh ./mFunction.sh & + sh ./mComponent.sh & + + echo "GTF ontologies run in background" + wait +else + echo "GTFontologies failed" + + exit + + +echo "masterDev.sh Finished" +exit +fi diff --git a/workflow/.old/templates/qhtcp/REMcMaster3.sh b/workflow/.old/templates/qhtcp/REMcMaster3.sh new file mode 100644 index 00000000..68a82f3c --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcMaster3.sh @@ -0,0 +1,121 @@ +#REMcMaster.sh Is to perform all REMc task and to operate from source directory REMc +# Gain permissions to the perl scripts for GTF +currentDir="$PWD" +GTFpath="$currentDir/GTF" +chmod -R u+rwx "$GTFpath" +#Step 0 Perform JoinInteractExps3dev.R to create the "REMcRdy_lm_only.csv" and "Shift_only.csv" files +Rscript JoinInteractExps3dev.R + +#Step1 Perform REMc java utility using file input file REMcRdy_lm_only.csv +#and producing REMcRdy_lm_only.csv-finalTable.csv +sh ./REMcJar2.sh +wait +#wait isn't holding process until Jar is complete. Should take at least 3minuites + +#Step 2 +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + #Jarflag= 1 + #echo "Jarflag = $Jarflag" +echo "REMcJar... .sh completed" +else +echo "REMcJar failed" +wait +exit +fi +echo "start Step2" +#Step2 If REMcJar successfully produces "REMcRdy_lm_only.csv-finalTable.csv" + #then add shift values back to the "REMcRdy_lm_only.csv-finalTable.csv" + #and output it as "REMcWithShift.csv" to be used to produce the REMc Heatmaps. +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + Rscript ./AddShiftVals2.R + wait + echo "AddShiftVals2.R executed" + fi + +#Step3 Execute REMcHeatmaps_zscores.R contingent upon "REMcWithShift.csv" being created. +cd REMcHeatmaps +if [ -f "REMcWithShift.csv" ]; then + echo "REMcWithShift.csv exists." +fi + +rm *.pdf #Remove .pdf files to be sure of a clean 'slate' + +cd .. + Rscript REMcHeatmaps_zscores.R + echo "REMcHeatmaps executed" + wait +cd REMcHeatmaps + pdftk *.pdf output compiledREMcHeatmaps.pdf + +#Step3b Execute REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R contingent upon "REMcWithShift.csv" being created in ../REMcHeatmapsWithHomology. +#cp ./Homology/REMcWithShift.csv ./REMcHeatmapsWithHomology/REMcWithShift.csv +cp ./REMcWithShift.csv ../REMcHeatmapsWithHomology/ +cd ../REMcHeatmapsWithHomology +if [ -f "REMcWithShift.csv" ]; then + echo "REMcWithShift.csv exists." +fi + rm ./Homology/*.pdf #Remove .pdf files to be sure of a clean 'slate' + rm ./Homology/*.csv #Remove .csv files to be sure of a clean 'slate' + + Rscript REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R REMcWithShift.csv Homology 17_0503_DAmPs_Only.txt Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv + + echo "REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R executed" + wait +cd Homology + pdftk *.pdf output compiledREMcHomologyHeatmaps.pdf + +echo "pdftk executed" +cd ../.. + +#Step4 Begin the process of GTF contingent upon "REMcRdy_lm_only.csv-finalTable.csv" +# being produced by "REMcJar.sh" (Step1) +if [ -f "REMcRdy_lm_only.csv-finalTable.csv" ]; then + echo "REMcRdy_lm_only.csv-finalTable.csv exists." + python3 DconJG2.py REMcRdy_lm_only.csv-finalTable.csv GTF/Process/ + #python2 DconJG.py REMcRdy_lm_only.csv-finalTable.csv 12 GTF/Process/ +else + echo "DconJG2.py failed" + wait + exit + +fi + +if [ -f "GTF/Process/REMcRdy_lm_only/1-0-0-finaltable.csv" ]; then + echo "Begin copying REMcRdy_lm_only to /Function and /Component" + cp -r GTF/Process/REMcRdy_lm_only GTF/Function/ + echo "Copy to Function complete" + wait + cp -r GTF/Process/REMcRdy_lm_only GTF/Component/ + echo "Copy to Component complete" + + wait + +else + echo "COPY failed" + wait + exit + +fi + +if [ -f "GTF/Function/REMcRdy_lm_only/1-0-0-finaltable.csv" ]; then + echo "Begin concurrent running of GTF tasks" + sh ./mProcess.sh & + sh ./mFunction.sh & + sh ./mComponent.sh & + + echo "GTF ontologies run in background" + +else + echo "GTFontologies failed" + +fi + +wait + +Rscript CompileGTF.R +wait + +echo "REMcMaster3.sh Finished" + diff --git a/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv b/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv new file mode 100644 index 00000000..e37a9e80 --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv @@ -0,0 +1,832 @@ +OrfRep,Gene,FY4_Z_lm_K,BY4742_Z_lm_K_1,FY4_Z_lm_L,BY4742_Z_lm_L_1 +KML14-13_-6_,VPS8,-59.3847278181521,-6.06157169672863,0.772743415948687,8.39820521641744 +KML12-19MG-C1DNMT,VPS8,-61.7704731480934,-7.00152259748668,3.59681346254071,8.99468364704015 +CQ10-30A-4CONTROL,VPS8,-22.1493418068443,1.01057802879191,-1.32259812578781,-1.40227673581777 +NBP13-2_-7_,VPS8,-51.1031045178275,-3.73433070180383,4.71147533646182,8.19513739350017 +CQ10-27C-1HDAC,VPS8,-26.4720466459974,-0.331443770649795,3.06034059896547,0.705141331827187 +PSC03-61_-6_,VPS8,-25.3220571929659,-0.72622940144637,2.6849735823816,1.68355485772886 +EG12-32D-3AHDAC,VPS8,-41.324395003364,-4.09835822552595,2.32241487629303,3.03853435965829 +CQ10-06B-1CONTROL,VPS8,-39.0716102878534,-5.34095716930887,2.01966216347308,3.77845508412703 +EG12-30A-6CONTROL,VPS8,-37.8775642922419,-1.76184832277848,1.49657970926092,1.19181087929409 +CQ10-30A-4DNMT,VPS8,-35.4936750499954,0.522291578085531,-0.327827611198834,0.87541470554711 +Lys_3,VPS8,-39.8518983279553,1.52099978286979,4.48531893673844,-2.86418009670572 +PSC07-4_-7__2,VPS8,-7.23278458251803,-3.71137209074474,3.57258904477228,8.02246684078295 +PSC10-14_-6_,VPS8,-15.1064754799911,0.75356131177783,3.54251842817478,1.3233468188676 +PSC03-17_-6_,VPS8,-8.18234185181037,-3.62751532438468,3.51942828772159,8.07587628895911 +Farnesol_1,VPS8,-13.1295215797973,-1.94911804017062,3.39747359540882,0.968785243900746 +PSC08-79_-7_,VPS8,-4.19181905498484,-3.47276039612587,3.36530291687454,3.9612329984847 +PSC11-37-M37A-1_-6_,VPS8,-5.58451903151947,-3.35601750693486,3.33705911091163,7.50674629779757 +Leucine_2,VPS8,-19.9462802819614,-2.18975358413756,3.29533708404183,0.337399876432149 +Tryptophan_2,VPS8,-29.1403100346624,-6.86896685809796,3.28961619940997,-1.79505709927979 +KML12-33_-7_,VPS8,-5.77846047153248,-1.30037937389063,3.23000650860923,1.70474451122617 +Tryptophan_1,VPS8,-14.1189265737416,-4.75429893154531,3.22587287318308,8.79792713362979 +Lys_1,VPS8,-11.2856668684289,1.10570501845037,3.19620576755786,-2.89936157176371 +PSC04-37_-7__1,VPS8,-19.2630326544322,-4.01291286825037,3.12775096546675,2.17147652385328 +BGC13-12_-6_,VPS8,-4.69555012893993,0.129023264065035,3.11114352910146,1.03744784040905 +PSC08-33_-7_,VPS8,-14.159063585086,1.01346543098873,3.10783804064975,0.058814946877905 +CWR15-17_-7__1,VPS8,-7.02325099437684,-0.523296278142047,3.09577687412093,2.54467584732745 +CQ10-27C-1DNMT,VPS8,-30.1815185878921,-4.8516766534435,3.00074866606415,8.61559522313556 +Leucine_1,VPS8,-12.3090074166839,-4.13037516639965,2.98954779517816,8.58577321143547 +Tryptophan_3,VPS8,-16.2255648659991,-6.41818568544464,2.88948835765507,8.76545071972336 +Lys_4,VPS8,-23.6715403311731,1.24856043860688,2.86242601835046,-3.02321820724638 +NBP13-28_-6_,VPS8,-29.7682159041289,0.808646398523773,2.8197403546427,0.814308638277034 +PSC07-29_-6__2,VPS8,-4.20358371764322,0.420677042639075,2.74458947513955,1.23580868999463 +NBP13-34_-6_,VPS8,-2.18953233936,0.0844892628259896,2.66049213000546,1.79366419823232 +SCVX93-627_-6_,VPS8,-0.502906968341118,-5.25722144186007,2.59251093585805,4.33320840298761 +Lys_2,VPS8,-9.05511778005427,0.95016280147417,2.5883837871097,-3.02765037942577 +KML12-21_-6__2,VPS8,-12.7680679724564,-0.854527535508487,2.57707941409955,1.00022864383892 +PSC08-79_-6_,VPS8,11.4409455317132,-1.63328386021317,2.57613442646025,1.45738080754952 +EG12-32B-2HDAC,VPS8,-21.1304723231736,-4.12523897023883,2.55796613325006,2.08992366904526 +Thr_1,VPS8,-13.5724306324034,-4.63736993300665,2.54780799581994,3.82990695548651 +PSC04-51_-7__1,VPS8,-10.0546589346468,0.160659044715004,2.52702658654835,-0.104948290282127 +SCXII93-541_-7_,VPS8,-3.08456007861081,0.678584229510604,2.45727424464177,0.484358042700346 +Ser_2,VPS8,-9.66306084475538,-1.74036556613552,2.44522147118079,0.359571309608067 +KML12-2MG-F3DNMT,VPS8,-2.88255603640969,-0.788722772502747,2.40161044079616,-0.110960165565157 +SCVI93-650_-6_,VPS8,-2.16923473076524,-0.0874923800997822,2.39812871648861,0.924040026966393 +PSC08-42_-6_,VPS8,-4.74362634045679,-2.99227286684029,2.38630188119521,1.54791111526214 +EG12-30C-3HDAC,VPS8,-5.44722316266923,-6.62367919514257,2.37301907145697,8.80552613682307 +BGC11-27_-7_,VPS8,-29.8429995486121,-3.59094369535735,2.37231364524654,7.83839016321656 +PSC07-79-6B_-6__1,VPS8,-21.1616778607605,0.715583731817298,2.36192900020766,0.0773495160863704 +PSC08-51B_-6__1,VPS8,-3.23542308874182,-7.52027885702353,2.35214222508142,9.00266742287267 +KML12-25_-6_,VPS8,-0.868912289374969,0.775343556872083,2.30364161991375,-1.35882296402362 +Pro_1,VPS8,-3.42203424672533,-4.45106694960332,2.29775305342292,1.28515250724191 +BB15-08_-6_,VPS8,-1.7636760292316,-3.48806290172213,2.2673589137703,8.27121312571654 +KML12-50_-7_,VPS8,-2.46219516194658,0.227902425967862,2.2520981934181,-0.242005609709674 +EG12-30C-3CONTROL,VPS8,-6.2322299130099,-0.980409544776534,2.24740102785834,0.926083546179913 +Qercetin_1,VPS8,-3.60150427167056,-7.54148748236098,2.24566503071291,9.01970857241558 +Met_2,VPS8,-11.583300392335,-4.18665672207283,2.21628767780497,8.55319319908789 +Qercetin_3,VPS8,-12.6388812344232,-7.05544182663451,2.19928366279483,9.12045250085229 +KML12-16_-7_,VPS8,-3.34034413533202,-3.58873236600511,2.1925561670846,1.83658287629382 +PSC04-46_-7_,VPS8,-0.684950228760758,1.82850109221622,2.15753621532798,0.753083888649431 +Met_1,VPS8,-17.4042847358604,-6.43500569702338,2.14381312233427,8.87530965784294 +Ile_3,VPS8,-5.23878097432242,-3.46732345856629,2.13829863537208,3.71123577421947 +NBP13-72C_-6_,VPS8,-3.35223422298993,-6.04271344042061,2.12815886242004,8.41260064598755 +SC95-1668_-6_,VPS8,-3.06624398271039,-3.36316982113876,2.11888615650921,8.1466276850671 +Tryptophan_4,VPS8,-8.90757650942717,-6.56936327515137,2.11399520145026,8.69727418510771 +Ile_1,VPS8,-7.38203527450982,-6.01933171414364,2.06390831884063,3.00945096522038 +SCB93-42-_726_,VPS8,-4.89740447033515,-4.13693558542637,2.06224691848987,7.69093858835016 +EG12-25B-7BHDAC,VPS8,-2.88050776524786,-7.38809125662153,2.06019307413239,8.73420554285671 +PSC08-37B_-6_,VPS8,-5.62829134486666,-6.12431852502124,2.04402044943558,8.19514788767809 +Ala_4,VPS8,-12.3231834761076,0.594392109186782,2.02118868053896,0.270870649190131 +Ile_4,VPS8,-8.19888581384635,-3.86206024184973,2.00805559050418,8.97605571062824 +PSC07-43_-7_,VPS8,-2.44337837753993,0.679106106983496,2.00505225868417,-0.463405842470903 +PSC07-69_-7_,VPS8,-0.234098941506491,-3.60050801889806,1.9400192595955,3.02695021127285 +Tryptophanol_1,VPS8,-3.94348261335724,1.02941142058836,1.92918975101339,-0.370772557214418 +KML12-42_-7_,VPS8,-4.49745044559542,0.688082765025664,1.91810969450136,1.26212957214766 +PSC08-24_-7_,VPS8,-6.94055748298722,-3.26625960733787,1.866619541704,7.72894559022456 +Farnesol_2,VPS8,-11.6437764834434,-6.65975019187317,1.85525054330942,8.78964827900868 +PSC11-37-M15A-1_-6_,VPS8,-1.58641206818955,-3.40366815267497,1.84652190749881,7.67593481927814 +KML12-61_-7_,VPS8,-3.50248426901029,-4.06294845983622,1.84206474901754,8.39754333343814 +EG12-25B-7BDNMT,VPS8,0.379476110270691,0.633432861727241,1.8398641435749,-1.52328294847224 +Qercetin_4,VPS8,-12.0120293395496,-6.51857576772642,1.83794526296682,8.6549724506806 +PSC04-21_-7__2,VPS8,-3.23997176795898,-0.235956869431461,1.83317313542729,0.614774947807729 +PSC03-1-1A_-6_,VPS8,-19.4814461302424,1.41230237981683,1.82898679574404,-0.776943145459301 +EG12-25B-5HDAC,VPS8,1.91485511571641,0.803150979619018,1.82725675916228,-0.724599451212079 +PSC08-52_-7_,VPS8,-9.52434888720067,-2.34306734204259,1.80415607785767,1.17618402784505 +PSC07-33_-6__1,VPS8,-2.38981886826098,-2.26630794940395,1.77386462949448,0.992779542596733 +Gly_2,VPS8,-7.52503595785521,-1.77749265372066,1.76265483942652,0.238916710489834 +EG11-07C-2DNMT_1,VPS8,-5.60333379294226,-1.13076508323083,1.75861850416733,0.809522806313864 +EG12-40B-5CONTROL,VPS8,-15.8457536915885,-0.136903405706871,1.74986708451992,-0.86732930987168 +Pro_4,VPS8,-9.77739989272224,-6.59571985796382,1.7327877423449,8.7814223638835 +KML14-43_-7_,VPS8,-2.51931714580922,1.59295479593636,1.72288907202666,-0.574194118016521 +SCXV93-627_-6__1,VPS8,-5.7311287180774,-3.6795540127766,1.70802971486316,7.55256178029826 +PSC07-57_-6_,VPS8,-1.51879787851869,0.557144061163078,1.70012002212442,0.741100039910471 +NBP13-4B_-6_,VPS8,-1.76958111912926,0.592285181661068,1.64927663071672,1.10572123074176 +Gln_2,VPS8,-6.31454816089284,0.726440587951234,1.64693899420562,0.658081730819998 +Pro_2,VPS8,-8.78986008990014,-7.60370261014225,1.64485514896333,9.01568053412605 +NBP13-24_-6_,VPS8,-14.6273317519134,1.17531529508368,1.64173019193462,-0.511707856778301 +PSC07-7_-6_,VPS8,-1.57517742730342,-4.5201186087166,1.6069178138675,8.40226470860369 +PSC11-32_-7_,VPS8,-2.17266672288972,0.792118907892301,1.60143170612068,1.13832375181232 +PSC07-AM16_-6_,VPS8,-2.27540496989116,0.221104997583241,1.57038565534946,0.865462098536407 +PSC07-37_-6_,VPS8,-6.39841500292332,-1.26000337344952,1.56348139766926,0.868823401167989 +PSC07-24B_-7_,VPS8,-2.80568062661159,-0.12508023672096,1.55891072232654,0.585817475694795 +PSC11-37-M37A_-1_,VPS8,-0.679008725153562,-3.43339163553847,1.54880727600808,7.16541936511268 +EG12-30A-1HDAC,VPS8,-2.02461060286936,-4.12847277012433,1.53323645160597,1.01936302651786 +PSC11-32_-6_,VPS8,-1.08365607245669,0.885311558526146,1.5326218388847,0.598172036280399 +PSC08-J3_-7_,VPS8,-3.09163647616545,-3.30320572902501,1.52868929047026,1.79833365702571 +PSC08-35_-6_,VPS8,-3.84582711331293,-0.871664854073376,1.51811032981806,0.600818438335709 +NBP13-80_-7_,VPS8,-1.65807222558747,0.831212544930451,1.49872002821334,-0.165650564703603 +RT175_2,VPS8,-4.36102344831458,-3.7514435735063,1.49767324058385,2.05316078567735 +Met_4,VPS8,-6.86913603768521,-5.38443759605136,1.49594048677732,8.88599830998787 +PSC03-110_-7_,VPS8,1.14754340523006,-3.6653063773484,1.46833075356906,8.61754222767328 +PSC07-69F_-7_,VPS8,-1.83098778281485,0.841016622629793,1.45635634635578,-0.509622048448881 +PSC07-AM17_-7__2,VPS8,-1.19436285289658,0.307404502318286,1.45204675979439,0.137372095282122 +KML12-14MG-C1DNMT,VPS8,0.321922219353183,0.000228952339917235,1.44270107875834,-0.5248556793283 +SCIII93-577_-7_extraunextracted,VPS8,-1.40294665011384,0.511935811133818,1.43545221632018,-0.452750008387724 +PSC07-32_-7__3,VPS8,-1.4243397044707,0.609498000556354,1.43173129576887,-0.0947641162577423 +PSC07-34_-7_,VPS8,-1.8609046796807,-4.27840664248997,1.41259802877186,7.67972856222393 +PSC07-69G_-6_,VPS8,-1.72824144387796,-0.0102827267918121,1.39283879736744,-0.367073282696887 +BGC11-6_-7_,VPS8,0.148839765957055,-3.8426037724401,1.38997898329368,8.35005947710692 +EG12-25B-7ADNMT,VPS8,-7.62829714618537,-6.42069512561629,1.37175709452715,2.96897005006579 +NBP13-40_-7_,VPS8,-0.436628959512128,-3.17543156416925,1.34840991946625,2.87502008261607 +EG12-25B-4DNMT,VPS8,-1.48504338126373,0.395594827733189,1.31978988619111,-0.677500289419047 +CQ10-29B-2CONTROL,VPS8,5.28192260757526,0.467585363648098,1.31668674123545,-1.02808729147271 +NBP13-5_-7__2,VPS8,1.3380306002961,0.872737840869983,1.30612002356863,-0.557975901027384 +Thr_2,VPS8,-7.87913500736024,-0.423365251575647,1.30345156648988,0.43307294051567 +PSC07-AM16_-7_,VPS8,-3.40796135947867,-0.74173164099518,1.28215580328229,1.38612569350455 +KML14-23a_-6_,VPS8,-7.68868928346341,-6.64428827247055,1.26414148820527,8.62088776430597 +PSC07-48_-6_,VPS8,-8.27397695187682,-3.95765439694135,1.25288953473163,8.80168199787225 +PSC08-22_-6_,VPS8,-1.0868756512755,-3.90914343578741,1.24715287905489,7.89032290274878 +KML12-8MG-C2DNMT,VPS8,1.40518051517026,0.290534575997662,1.22554656614777,-1.75364438130931 +PSC07-92_-6_,VPS8,-2.24300182585702,0.25026617209396,1.22078939881839,-0.176355849769085 +KML12-33_-6_,VPS8,-1.25192180127375,-4.30850900114966,1.21655684155584,8.68371548594376 +PSC08-86_-7_,VPS8,-2.03751218245904,1.18477967840693,1.21361918734851,0.238838349101716 +EG12-24A-2ADNMT,VPS8,0.0514624261939759,0.77754391748443,1.21258861641351,-1.40878687237766 +PSC03-58_-7__2,VPS8,-3.55568469865347,-0.881204280841511,1.20516866784845,0.673359014980345 +CQ10-24C-2DNMT,VPS8,-14.9786623480012,0.655402087216137,1.19348616113604,-0.785593315908515 +EG12-28D-2DNMT,VPS8,-18.073799337432,0.417067989780462,1.18785167063461,-0.74000087439007 +CQ10-28A-3HDAC,VPS8,-2.78071801722753,-1.22007946121976,1.18645703490825,0.688997120790878 +SC95-1672_-7_,VPS8,-1.24263731683206,0.647856851474319,1.17833976848001,1.13705405521153 +NBP13-44_-7_,VPS8,1.09753625561547,-3.84297064801632,1.16661915253554,8.43791306914286 +KML12-17_-7_,VPS8,-1.0505155508043,-1.78077777475324,1.15692886674153,0.824587202628629 +PSC08-42D_-6_,VPS8,-3.17385054037941,0.0887274470307813,1.14898592977908,0.176655614582461 +KML12-60_-7_,VPS8,-1.72500466969631,0.683963827938775,1.14724263512114,-0.311145585877092 +PSC08-45_-6_,VPS8,-1.86087534641468,0.327009116854274,1.13992485173138,0.0111002056155133 +EG12-25B-5DNMT,VPS8,3.23626210642434,-2.19247433734489,1.13316330096272,0.330278593939243 +NBP13-9_-7_,VPS8,0.767023174975367,-3.4943181212287,1.12906372059501,8.26809703174893 +NBP11-18_-7_,VPS8,-0.302907703148754,-0.285100160130153,1.12857397642134,0.946077430928206 +PSC11-1_-6_,VPS8,-0.958649830577411,0.756733125204131,1.1269725778402,0.490570791283905 +KML12-61_-DCM_,VPS8,-1.92307299679092,-0.134845821816152,1.11690687556869,0.803689977690897 +EG12-30A-1CONTROL,VPS8,-1.22325915440329,-0.260353413409403,1.11495762889068,0.353011800166924 +SC95-1667_-7_,VPS8,-0.00914819334132018,-3.33312502656556,1.11323136176195,3.01982995190371 +Phe_4,VPS8,-9.9366289526209,-6.47388037168125,1.11137292857311,8.69780996092287 +PSC08-46_-6_,VPS8,-0.98074182585912,-1.24092314925672,1.09724494433122,1.26704772811865 +PSC07-18_-7_,VPS8,0.928629240913673,0.390402323921037,1.07417831808504,0.861683702447192 +KML12-49_-7_,VPS8,-2.34773878091564,0.768463889095804,1.07177419312652,-0.100960835902209 +SXBI93-42_726__-7_,VPS8,3.4844437897864,0.913545255453172,1.06537022047143,-0.557216743188716 +NBP13-13_-6_,VPS8,-0.737717739857149,-0.148566322100427,1.0651456192527,1.45530864087039 +NBP13-43_-6_,VPS8,-1.20199961698142,-0.511505433805913,1.06075738172302,0.375964899904487 +Ile_2,VPS8,-5.59924433106212,-3.31529926001099,1.05900678955255,1.54283987081717 +PSC07-50_-7_,VPS8,-1.11425269190068,0.504400855315958,1.05078654711429,0.0562939699042167 +KML12-27_-7_,VPS8,-1.11701103039861,-4.56242015313628,1.04058543544377,8.01663230695369 +PSC07-92_-7_,VPS8,-0.93686532884296,0.290573182823189,1.03606503185397,-0.400303660722067 +PSC07-29_-6__1,VPS8,0.31326587140113,-1.18649517809439,1.03292953397384,0.744372330773908 +CQ10-24C-2CONTROL,VPS8,-5.55578861468958,0.493869187443201,1.01761935270344,-2.06489568475027 +EG12-30A-1DNMT,VPS8,2.60266254887398,0.21315644615817,1.01609579425587,-0.923154012231783 +SCXXV93-627_-6_,VPS8,-1.48489064598203,-3.83691566207184,1.01530847373826,7.81588280440224 +PSC07-22_-7__1,VPS8,-1.25296363796346,-0.00569239808079719,1.00004369921244,0.346533039402701 +Asn_2,VPS8,-5.90912702226192,1.10975610803914,0.9956765433856,-0.335822308128861 +PSC07-85_-7_,VPS8,-1.40838038477129,-0.0572935043762748,0.991844538477016,-0.242066315053466 +PSC07-78_-6__1,VPS8,-1.08445312809892,0.701901495694419,0.970182277973893,0.487084326510906 +CQ10-30C-1DNMT,VPS8,-3.01923894114928,-0.361245612865802,0.9684341183076,0.211473880701457 +PSC07-26_-7_,VPS8,-4.90617612836743,1.21114402827303,0.965140507656166,0.615588672247558 +EG12-20E-3BHDAC,VPS8,-3.23277702584832,0.539203423651625,0.951434968305804,0.236434349279269 +KML14-43_-6_,VPS8,-1.14384489985644,-3.28003093613504,0.947375929674324,7.44903120374846 +PSC07-73_-6__1,VPS8,-1.99056074997031,-3.95903579027844,0.943514289066032,8.44806313904152 +SCX604_-6_,VPS8,0.2364531741028,-3.11919524016148,0.939742731576689,2.00681259673281 +PSC08-21C_-7_,VPS8,-0.765330458102444,-4.23125822465818,0.933564981809759,2.58036623404231 +CWR15-6_-7_,VPS8,0.424981109296994,-2.35158783565249,0.923641986449781,3.18146986619603 +PSC11-33_-7_,VPS8,-2.50132776180276,0.450149698845697,0.918854818235586,0.894793351867301 +KML14-24_-6_,VPS8,-1.57927902708673,0.0523944264606447,0.909056691423199,0.326012586953193 +PSC04-42_-6_,VPS8,-1.48279584619134,-3.93836903115492,0.897287425395246,7.9939220285913 +SC508_-6__1,VPS8,-3.10194560193402,0.556032458719175,0.887058745344082,0.595086636025943 +EG12-30B-3HDAC,VPS8,-4.46286854794026,-4.2706700824535,0.877787661102703,1.96041359264094 +KML12-21_-6__1,VPS8,-1.17514248320916,-1.60449961624755,0.877536302338068,1.12873401269934 +PSC07-36_-6_,VPS8,-0.459065873542568,-1.02489374549508,0.874862980250975,1.03185476378994 +BGC11-3_-6_,VPS8,-1.64941284315962,-6.72384574618896,0.873893221920322,8.8180160907878 +Ser_4,VPS8,-6.00216101566999,0.660257637965246,0.859057378963831,-0.918096029345318 +CQ10-27C-4ADNMT,VPS8,-2.08912659275321,-0.169478200299639,0.852589350365484,0.341433790524106 +PSC03-19_-7_,VPS8,1.34253275088438,2.10111416719401,0.852007981867927,0.271498769084958 +EG12-2C-9_?_DNMT,VPS8,-4.55089970225382,-0.253563409414306,0.841512537192361,0.498884695053459 +PSC10-48_-7__1,VPS8,2.34482718651517,-0.0607736012999129,0.834581521966255,0.00408396382984304 +SC95-1625_-6_,VPS8,-1.41806744300182,0.38972019385579,0.831261153768909,0.436382745917406 +BGC13-37_-6_,VPS8,0.958391379513961,-1.05379232505938,0.823081453066877,1.31927615042091 +NBP13-40_-6_,VPS8,-0.53050856017516,0.166998102712512,0.815980162548596,1.13937211263714 +PSC04-46_-6_,VPS8,-0.13716463520953,0.610948383605336,0.81391091233135,1.00730661409215 +PSC07-50-2.0_-7_,VPS8,-0.540476813162717,-3.26758297272761,0.807123414851502,6.72633965970696 +KML13-7_-6_,VPS8,-1.1215162154627,0.45519896905071,0.779559088387823,0.811061243425377 +PSC11-37-M33E-2__,VPS8,-0.553075956664799,-6.89451578503918,0.778362296334274,8.83332611490885 +CWR15-9_-7_,VPS8,-1.19826114280161,0.839694510187522,0.77701460855743,-2.1636588455648 +KML14-32_-6__1,VPS8,-1.87752652659542,-4.08687098161068,0.774293527686098,2.35611080387528 +NBP11-26_-6_,VPS8,-0.28854654070154,-1.99041904107381,0.769420410991092,1.65743629114243 +NBPB-61_-7_,VPS8,1.26333596710288,0.837320533081677,0.764287016349483,-0.293969042955047 +PSC07-61-G,VPS8,-0.146142637595884,-3.43938711563306,0.75978769446253,1.55465600535861 +PSC08-55B_-7_,VPS8,-1.74107120754049,-1.92013596477951,0.745940259035358,0.54981026165193 +KML12-10MG-C1HDAC,VPS8,-3.72374408311646,0.642804154627986,0.740077113141583,-0.738039205078295 +NBP13-29_-7_,VPS8,-1.35589507992386,-3.88251437439173,0.737240813274841,8.05724770951522 +PSC08-21C_-6_,VPS8,-0.864676161164735,-4.51241049329075,0.733120151204288,8.23072368453052 +BGC11-42_-7_,VPS8,0.845094168236814,-6.55541044858566,0.73080521806548,8.71017509378745 +EG12-31B-3_?_CONTROL,VPS8,-1.25062102264949,-3.55189037497772,0.712001960801391,3.10622832089492 +PSC08-78_-6_,VPS8,-1.84197359668576,-0.216998862112379,0.703781718363127,0.595498613864717 +PSC08-67_-6_,VPS8,-0.261257499874414,-4.03188275459269,0.698383991601289,2.55322889911939 +BakersFungus_-6_,VPS8,-6.47199294902385,-5.14162750550334,0.697930734990223,8.26557128019764 +PSC03-45PSC04-55_-6_,VPS8,-1.87189858548708,0.355647385899706,0.689078852301475,0.806593602955233 +PSC08-98_-6_,VPS8,-1.40748116844395,0.748575891322727,0.687478264555061,-0.353259747612713 +PSC08-7_-7__1,VPS8,-1.41493384099721,0.072700817138778,0.68601308620766,-1.14530862825408 +NBP13-34_-7_,VPS8,0.750599580479279,-0.163750226670696,0.679182614487408,1.71257856994675 +NBP13-78_-6_,VPS8,0.675220167248254,-1.45365098410745,0.675577643301718,3.53742178826769 +PSC03-25_-6__1,VPS8,-0.828402037507726,-2.48054373487782,0.672792426022625,1.6581390990726 +KML12-29_-7_,VPS8,-1.19293058032119,-0.175694812980679,0.667575515404378,0.114017931026017 +PSC07-61-D,VPS8,-2.16322748018248,-0.0924370235231872,0.666630892950189,-0.17989381177193 +KML12-56_-7_,VPS8,0.567016829339483,-1.26370049099003,0.665824112399186,-0.124109352280184 +KML12-10MG-C1CONTROL,VPS8,-4.72385774214234,0.201726198710177,0.662649694452273,-0.0851068509758296 +PSC08-66_-6_,VPS8,-2.91654418830091,0.0643207373502594,0.662389416505668,0.718932699229641 +CWR15-2_-6_,VPS8,-1.39899677412029,-1.2249937217568,0.659971507356832,1.24526712822355 +PSC07-9BT_-7_,VPS8,1.87208620236553,-3.96098554918899,0.655122715703564,8.11833115091074 +PSC07-73_-6__2,VPS8,0.182151229745923,-2.0967823302816,0.654288366771924,0.691222414994518 +CQ10-06B-1HDAC,VPS8,-3.53390727736255,-0.18194911871623,0.649359302480921,1.33133946177282 +PSC07-28_MeOH,H2O_,VPS8,-4.7158608869298,0.703098078843036,0.647223848536846,0.717262279151568 +PSC04-47_-6_,VPS8,0.485376281050823,-1.34387815103852,0.645532973415408,1.25213118750743 +BGC13-11_-6_,VPS8,-1.29410506079612,-6.045417021185,0.645345670594019,2.4245935154389 +EG12-31A-8HDAC,VPS8,-3.11741738252231,-3.21795492650365,0.639889563732006,1.19851139497658 +PSC04-10_-6_,VPS8,2.62052145042126,-5.6995023023386,0.639429820443143,8.65002436118002 +NBP13-78_-7_,VPS8,-0.945032114700042,1.11272392208558,0.638507090526519,0.225319132622557 +CQ10-24C-2HDAC,VPS8,-6.06724748700336,-0.0687212389539658,0.626166996852428,-0.742546065293024 +GlutamicAcid_1,VPS8,0.125401474914166,1.00177156060193,0.625512653229654,-1.47804041528374 +KML14-4_-6_,VPS8,-1.20450912846412,-4.41341905177404,0.623277181893859,7.88863986414419 +PSC07-29_-7_,VPS8,0.62087472873831,0.579440188233813,0.622685272544881,1.27300299055297 +KML12-47_-6_,VPS8,-0.555766525203296,0.131665889850295,0.619895190257441,1.54659447211942 +NBP11-23_-6_,VPS8,0.622663046473674,-4.02134229167388,0.614713956367077,3.58541550127852 +EG12-25B-4CONTROL,VPS8,-3.21429099930337,-0.197835256314151,0.613992313462161,0.542914144489868 +PSC04-42_-7_,VPS8,0.723695918078154,-4.69710554661713,0.598859704996455,8.43551145660922 +EG12-30A-6HDAC,VPS8,-1.7042660506193,-6.2449092254981,0.594666878635412,4.30622590079548 +NBP11-43_-6_,VPS8,1.34493605571426,0.686239117774631,0.592407082257878,0.976577432647739 +PSC07-56_-7_darker,VPS8,-1.7135829028028,-0.312326995616263,0.589872412908695,0.739326284331447 +Ala_2,VPS8,-0.331378166552182,-2.02739474958535,0.5818289324404,-0.152261022003257 +KML12-31_-6_,VPS8,3.65322335650478,0.819837923293204,0.57771718955182,-0.556580360202665 +PSC11-37-M13E-1.2_-6_,VPS8,-1.48443345162886,0.511117186522695,0.577069332606841,1.0090554736594 +PSC07-6_-6_,VPS8,-1.06554834389421,0.512453576637129,0.576502559134329,0.790016269522199 +EG12-30C-3DNMT,VPS8,-0.924537265178598,-4.83695877209263,0.575330092122519,8.59184715622732 +EG12-39B-1HDAC,VPS8,-5.22401015764284,-6.36167933861325,0.56669551313997,2.92808909351643 +EG12-40B-5HDAC,VPS8,-2.63679586473229,-0.424762407462804,0.56224727384067,-0.305616283973089 +PSC08-35_-7_,VPS8,-0.739209690456507,1.05383217949829,0.561691041219575,-0.57920674241577 +PSC08-37B_-7_,VPS8,-1.01592758419754,0.551023965545858,0.558574192538113,-0.240174218158172 +NBP13-79_-6_,VPS8,-0.235232823962006,-1.40810738011443,0.557983904858583,1.25614020558761 +EG12-25B-7ACONTROL,VPS8,0.0485543868901536,-6.98969383165329,0.556508185659764,9.00608806668302 +PSC07-44_-7_,VPS8,1.15588922515907,-2.77585628835827,0.552968081252044,3.251046716214 +PSC11-20A_-7__2,VPS8,-5.07966419002819,-0.0891411656811373,0.538434679647838,0.945703649710246 +CQ10-29C-2BHDAC,VPS8,-3.10846062146652,0.864175463763291,0.535142690665855,-1.00365987038 +SC95-1602_-6_,VPS8,0.712271116708852,-4.18881881852378,0.533816165056366,8.18262390768868 +PSC04-9_-7__2,VPS8,0.0829370206347525,-0.423700948203536,0.533757784956193,1.48070154976241 +KML12-2MG-F3HDAC,VPS8,-2.10439405197129,-2.81850403777255,0.526549464254263,1.64751369576237 +NBP13-35_-6_,VPS8,0.486106578225563,-0.273014853083334,0.518630041498836,1.98994947072619 +PSC10-50_-7__2,VPS8,-0.613471128418632,-3.47227659360797,0.515959151915915,8.35472014692867 +SC95-1621_-6_,VPS8,-4.83617380624567,-0.522284505181906,0.513810439895656,0.68697201780525 +PSC07-5_-6_,VPS8,0.242236884968637,-3.7182570092167,0.511361719027284,3.03153039535789 +EG12-24A-2ACONTROL,VPS8,-4.24921515316099,1.16727650877054,0.504697468425578,0.391482618151205 +KML12-2_-7_,VPS8,0.02113587509577,1.80016036985866,0.503810415236837,-0.891585528253245 +Val_2,VPS8,-4.44269737583699,-0.418884232722709,0.499054869576902,0.533312468932889 +SCXV93-627_-7__2,VPS8,0.845494719041802,-4.93377566702827,0.486478822997945,8.65198227903797 +EG10-29C-1ACONTROL,VPS8,-5.23891449125742,-6.28565130545347,0.480481889374607,2.35600500298448 +SCXII93-541_-6_,VPS8,-0.0163601308148119,-4.54647107955946,0.478504263481243,8.73284861779511 +PSC03-25_-7__1,VPS8,-2.54519616688346,-3.43428804839522,0.478070466903568,7.48160985193101 +PSC07-G2_-7_,VPS8,0.0180498132119436,-1.20348332354053,0.472398678004806,0.450217425559532 +EG12-31A-8CONTROL,VPS8,-2.9956863515181,0.0869020752001179,0.461463760908493,-0.184414995719464 +KML12-53_-7_,VPS8,-1.65170994133666,-0.917960605835929,0.458088255949873,0.112216210063897 +NBP13-3_-7_,VPS8,1.3148532741716,0.751003552475905,0.457972306584251,-0.318588811316854 +SCXVI93-653-2_-6__1,VPS8,-6.5833733830903,0.638827537377217,0.457318773796201,-0.167728870125363 +KML12-2MG-F2HDAC,VPS8,-2.70790071305966,0.230800336531392,0.456613347585776,0.407493482055926 +RT175_3,VPS8,-5.47240121983477,-0.114834807619721,0.449044205431387,0.863090497633401 +PSC07-57_-7_,VPS8,0.652679069548839,-4.42427842077827,0.441693177817923,8.46611786320102 +NBP13-9F_-6__1,VPS8,-1.06508508058945,-0.622216673962081,0.441135323527381,0.994230410206181 +PSC07-78_-7__1,VPS8,-0.965614964019557,1.52784689755354,0.4380014473167,-0.725426135219685 +SC95-1668_-7_,VPS8,0.604283226581556,1.02835932748203,0.424477534944655,0.0534571887263053 +PSC07-5_-7_,VPS8,0.918175471798869,-3.25844309233728,0.419911724610282,2.6667775626013 +EG10-29C-1AHDAC,VPS8,-5.85317230008854,-0.843363195426855,0.418153024092567,0.605330833311606 +Phe_2,VPS8,-0.867936199660815,-4.24287271118776,0.417074613908813,6.5664206395988 +PSC07-32_-7__4,VPS8,1.10356575902072,0.0355038260707286,0.414840764242466,2.02121954360463 +PSC08-53_-6_,VPS8,0.668325838241337,-6.89137047097241,0.413652080536163,8.70746415683347 +EG12-27E-2HDAC,VPS8,0.688007448249649,-4.81224686289195,0.410548205210293,8.76300075743224 +SCXVI93-638_-6_,VPS8,1.90725577683305,-1.22735410651875,0.407792177981289,1.51547335616936 +PSC11-19_-6_,VPS8,0.56550768337729,-1.31823731512492,0.407071345911096,1.54646555852418 +EG11-07C-2HDAC_2,VPS8,-3.34159838532741,0.0125463572254147,0.406352135510353,0.868343556119022 +PSC07-58B_-7_,VPS8,-1.32526103527839,-4.62913160543504,0.401838218598355,8.25220007562413 +SCXV93-624_-6__1,VPS8,-1.32753082317328,-3.84510225084721,0.398727045759965,7.78656212335033 +PSC04-49_-7_,VPS8,-1.03153895067243,1.32973939899747,0.394685845492427,0.221614742486169 +EG12-29D-3CONTROL,VPS8,-8.99639258147978,-0.380201906864224,0.392462010563366,-0.714771324260278 +PSC-77_-7_,VPS8,1.4211661336599,1.20215012280289,0.38894513564207,-0.845327374200623 +PSC07-32_-6_,VPS8,1.53338104857006,-2.76708602834489,0.375020670916059,3.81594506745542 +PSC07-50_-6_,VPS8,-0.0165047741610572,-3.70116766711795,0.372691142752209,7.78143149867606 +PSC07-69_-6_,VPS8,0.0140776843958994,-5.18713867062521,0.365993647926792,8.54062923917032 +PSC07-23_-6_,VPS8,-0.648883460949773,-4.45672551587122,0.35858342937843,8.42447740777173 +PSC07-61_-7_,VPS8,0.456715657164231,-4.4103013757929,0.334475691345834,2.42022997065327 +PSC08-2K_-6_,VPS8,1.01774168010229,-6.82272867739724,0.334379202013604,8.68551917505238 +EG10-02A-1BHDAC,VPS8,1.93795759143795,0.781298602599102,0.330402057689311,-0.678109730145776 +EG12-28D-1DNMT,VPS8,-4.37701210128001,-3.98196173402362,0.325297853097786,8.05927474159681 +SC95-1667_-6_,VPS8,0.342430218269747,-4.7250365568943,0.322389388940552,8.34938626166508 +NBP11-1_-7__2,VPS8,2.20101022918083,0.774085636843856,0.320546361611477,0.311901322851427 +NBP13-53_-6__2,VPS8,-2.34426531762195,1.0143386534182,0.311763399874321,-0.777595216343186 +PSC07-19_-6.1_,VPS8,0.396174830572216,-4.14097491021328,0.305305101292671,8.32078961120233 +CQ10-23B-4CONTROL,VPS8,-3.57954983929157,-2.01506276011223,0.303657485132231,1.13506612572294 +PSC08-53_-7_,VPS8,-8.7957398924995,0.227045651416719,0.301217602981313,-0.165027180335853 +KML14-32_-7__2,VPS8,-0.418798379738953,-3.76051435345104,0.296811607552208,7.59794823289732 +SCXVIII93-526_-6__1,VPS8,0.792952770646198,-4.86813721034788,0.295466432744052,8.58728845774149 +PSC11-20-M2E-1_-7_,VPS8,-0.432549612551317,-4.95883777897175,0.286465356465972,8.48160522877584 +PSC04-9_-7__1,VPS8,-0.748388979737284,-4.76232492506419,0.2834239154139,8.1205363236802 +NBP11-18_-6_,VPS8,1.35760195768384,0.309125475810095,0.282963361290312,0.358694911677952 +Gln_1,VPS8,-0.325040158107735,-1.98529926053033,0.279878135163108,0.365876139331085 +PSC07-81B_-7_,VPS8,1.08001822684921,-1.20433393013729,0.279443527750709,1.30748055645596 +EG12-25E-4BaDNMT,VPS8,2.48759219224045,0.463702864812224,0.27942974356039,-1.63997430713982 +EG10-29C-1ADNMT,VPS8,1.49807087672401,-0.0432491864873475,0.276566686147733,-0.444184757211568 +SCXV93-626_-7__1,VPS8,-1.14521041396433,1.21797972081905,0.27615234960345,-0.463963444926528 +PSC07-64_-6_,VPS8,0.252821136542637,-4.58762504180139,0.275372326598359,8.36316125908718 +CWR15-6_-6_,VPS8,0.419595926252268,0.0617076949017208,0.271165716046996,1.70222251112871 +CWR15-17_-6_,VPS8,0.676972071274076,-3.31741981562928,0.27054056247431,3.88917208204988 +NBP11-38_-7_,VPS8,-0.667139878826221,-3.19840902524327,0.266464496313613,8.46415789909553 +ScotiaCocktail_30mgml_,VPS8,1.39372941501074,-0.198362502191778,0.261647327214605,0.657091347008906 +PSC08-55_-6_,VPS8,-0.356739303757362,-4.70704223533552,0.261205422289685,8.09872400694818 +BGC11-10_-6_,VPS8,0.119663281185261,-1.48630059370235,0.256620151921922,1.00180834694256 +PSC04-21_-7__1,VPS8,-0.37568555913128,-4.60000241290997,0.255367412272373,8.09335669964138 +PSC07-90_-7_,VPS8,2.05293084484603,-4.08256952225054,0.254336841337373,8.42496543781088 +EG12-29D-3DNMT,VPS8,-5.50457981266004,-2.4110648128284,0.250404292922934,0.993809487193666 +SCXV93-626_-6__1,VPS8,-0.236248361861498,-1.46864562385553,0.247520964642162,1.35682104139173 +EG12-31C-2BHDAC,VPS8,-0.160699017985937,-1.83245658733396,0.2406013011022,0.51842158451702 +NBP13-53_-6__1,VPS8,0.209100409283973,-0.10845691435393,0.237957169065192,1.67840418972147 +KML14-48_-7__3,VPS8,0.135949312778632,-4.300295684572,0.235863593806207,8.07199694465775 +EG12-39B-1DNMT,VPS8,-4.52597957553089,-4.63384083511215,0.234057864874464,3.91917552769884 +NBP13-53_-7__1,VPS8,1.05830857551771,0.0662041336405929,0.231365893588703,0.965776314988967 +EG12-31A-8DNMT,VPS8,0.131738471866682,0.56961452269416,0.214823243536873,-1.50688466169927 +PSC11-17-M1E-2_-7_,VPS8,0.902394174679451,1.27027734907331,0.214479449613632,-0.0201153006617577 +KML12-102_-6_,VPS8,0.55371571043675,-5.0913537658335,0.214295390131142,8.32504751129926 +EG12-3B-5HDAC,VPS8,-5.27128931351746,0.466123215797255,0.210533117008874,-0.226738556787044 +PSC11-20A_-7__1,VPS8,0.589783489739684,-4.86126885048795,0.207087880263937,8.14850920968342 +PSC07-32_-7__1,VPS8,-0.519569274933103,-5.14901568685137,0.20698166091501,8.44823365967015 +EG12-40B-3DNMT,VPS8,-1.83187587273094,-0.279730841625938,0.201640692583895,0.577695919194337 +PSC07-51_-6_,VPS8,0.230151579367889,-3.04713354432851,0.201064189094686,3.14039008259368 +PSC08-81B_-7_,VPS8,1.72833701499063,-4.04733370874309,0.194583997975471,3.10563831951985 +PSC07-27_-7_,VPS8,-1.29308244245311,0.226517377546697,0.191596072015222,-0.245400334384469 +KML12-19MG-C4HDAC,VPS8,-3.69636704249126,-3.20226753410223,0.191572557808208,1.71635491978842 +PSC07-84_-6_,VPS8,-0.0558497873228986,-4.3650735973958,0.187635955220147,8.38486376053436 +SC95-1603_-7_,VPS8,2.07041144841082,0.778198520197748,0.180744670895545,-0.307398565583544 +KML12-2MG-F2CONTROL,VPS8,-1.24466131218752,-3.87863832906367,0.179412469442984,2.39816426026711 +PSC07-32_-7__2,VPS8,0.729169100920944,-6.18253856394962,0.179377603549826,3.80041370755841 +KML14-35_-6_,VPS8,-0.020259432211771,-6.59635930498263,0.175283699025185,9.00057521286042 +PSC10-48_-7__2,VPS8,1.22969981440392,-3.8367226279442,0.172709298774498,7.90916850158387 +NBP13-37_-7_,VPS8,-0.0314910386221077,1.04662926522425,0.170981409976317,0.220905717712319 +PSC08-59_-6_,VPS8,1.05713221040106,-1.06291118871629,0.164065800609977,0.553872823209496 +KML12-21_-7__2,VPS8,0.773833550151947,-5.25915810454858,0.162822790977125,8.57723865397246 +CQ10-23B-4HDAC,VPS8,-2.61932841056262,-0.277459777981043,0.1600035186396,0.0457411302807321 +PSC07-61-A,VPS8,1.25507207802034,0.414544154227596,0.159584317086967,-0.478690629538911 +PSC08-22_-7_,VPS8,-0.421099523883719,1.11702172984932,0.158043731110178,-0.825870629432518 +PSC07-14M_-7_,VPS8,0.45285175798488,-3.95907873751631,0.153742252896032,7.33268770716329 +PSC03-32_-6__1,VPS8,0.25165185186951,-4.09926149166533,0.15311953182752,8.10750957077653 +KML12-14MG-C1HDAC,VPS8,-0.561100122159501,-2.95219338509465,0.151217313563547,1.30852065418956 +PSC03-58_-6__1,VPS8,0.285965704162756,-4.34288289665631,0.14349978865455,8.5223688671327 +KML12-47_-7_,VPS8,1.61089268679325,-3.54867047435157,0.141317021575857,2.66514738539158 +SC95-1666_-6_,VPS8,-0.812728970025006,0.0764863420253217,0.138593427735837,1.69570589478493 +PSC03-32_-6__2,VPS8,0.403127826111177,-4.03747331990474,0.135016024930784,7.65829150802647 +PSC07-83,VPS8,0.00924174147012767,-3.65338395347329,0.134987645715422,8.12412953436672 +PSC03-36_-6_,VPS8,1.91363727943056,-3.94273959793601,0.132863258736901,8.4406195725605 +PSC08-7_-6__1,VPS8,-0.499168494161307,-4.72024142925379,0.130273452626445,8.33189937119893 +NBP13-37_-6_,VPS8,0.142837572833953,-4.95788722866984,0.12322405553054,8.16202944928635 +Gln_3,VPS8,-2.52362913591434,-0.753370913563636,0.122524305163187,0.381731566523545 +Val_1,VPS8,-0.887019006445511,-1.96242311730565,0.120897770705587,0.563482264614341 +NBP11-25_-6_,VPS8,0.883421620515309,-4.28585833092378,0.120006663343221,8.38849926033679 +GlutamicAcid_3,VPS8,-5.63605859141069,1.09349623809469,0.11964178771714,-1.36755668958086 +BGC11-76_-6_,VPS8,1.0531084954965,-1.08559406937057,0.116067628250987,0.130585033221333 +PSC08-85_-6_,VPS8,0.441848748751129,-4.29344125972011,0.114302441055475,8.21802671852262 +Pro_3,VPS8,-4.64786536480066,-0.765964962624958,0.113345656080416,-0.102564752935117 +EG12-25B-4HDAC,VPS8,-1.55089858646524,-2.15802771856957,0.110732335762945,0.565638746585065 +PSC08-96_-6_,VPS8,-0.334001976623188,-1.76625349863689,0.109645817231944,0.400941660361597 +Tryptophanol_3,VPS8,-4.00119530850803,0.797605988636514,0.107620352089827,-1.01138820631083 +PSC08-33B_-7_,VPS8,1.389560045268,-0.290166677763139,0.10496730087085,8.33913558459553 +EG12-28D-1HDAC,VPS8,-2.53501145462251,-2.49319112532234,0.104876487381691,0.640692641093627 +PSC08-58_-7_,VPS8,1.18512943392257,-3.5446712412681,0.101692339418083,3.27445578811252 +PSC07-16_-7_,VPS8,-0.522867750122591,-3.26249383926192,0.0988836079319774,2.55553297385346 +Gln_4,VPS8,-3.90959459916727,-0.357512287148684,0.0945351013038038,0.125364373655154 +KML12-63_-7_,VPS8,1.04880763180262,-2.68974958930811,0.0904371426055424,2.56087402098346 +NBP13-13_-7_,VPS8,2.10205091603647,0.593132475837895,0.0853053696333807,-0.432747938649257 +PSC03-25_-7__2,VPS8,1.06336401219268,0.29149437823113,0.0844839940573342,-0.30056000629291 +EG12-24A-2AHDAC,VPS8,-2.67468635249626,0.847025695068276,0.081011999766484,-0.068689124851195 +EG12-32B-3DNMT,VPS8,0.583323090771494,-2.63952222351745,0.072276066443358,0.763918087310158 +KML12-54_-7_,VPS8,0.187053931139191,-3.96181810880814,0.0698241022360864,8.1143590433874 +KML12-18MG-C1bHDAC,VPS8,-3.13510837640891,-1.10902738447372,0.0689913749738954,1.13692275432748 +BGC11-11_-7_,VPS8,-0.720875387701313,-4.24152855750735,0.0670405066264439,7.90836125692793 +PSC07-24B_-6_,VPS8,0.571342980331645,-4.6393789762997,0.0598256992467177,8.24541983438849 +Uracil_4,VPS8,-2.45274681581195,0.529933673555301,0.0572699481946943,0.490350819672969 +NBP13-9F_-6__2,VPS8,1.77733166118152,0.546932441589521,0.0545236509823851,0.723155131035812 +CWR15-2_-7_,VPS8,0.804176285121075,1.36025946523395,0.0527892755064091,-0.352000964332155 +SCXIV93-551_-7__1,VPS8,-0.499128034484043,-5.08296534788443,0.0485542857396843,8.41552234643853 +KML12-14MG-C2DNMT,VPS8,-2.2207914860193,-5.32569548346833,0.0471718125341958,8.63851353666488 +EG12-40B-3CONTROL,VPS8,-4.23321132781659,0.546289375235312,0.0464947655391325,-0.818719676350231 +PSC04-21_-6__1,VPS8,0.384555822752016,-6.82821781412625,0.0429854728509488,9.07053804348056 +CQ10-23B-4DNMT,VPS8,-0.193960918670136,0.550575189754438,0.0423116691947841,-1.09850173881852 +SCXVIII93-728_-6_,VPS8,-0.954644322527621,-4.29633356994848,0.0379128908136857,3.06808422043509 +EG12-36A-1CONTROL,VPS8,-1.78350430557001,-4.44273750563362,0.0367209637684846,2.35291524729473 +PSC08-7_-7__2,VPS8,-0.234842388076337,1.70165985182159,0.0364614966566038,-0.776588462551746 +SCXIV93-551_-6__1,VPS8,1.37517359552248,0.836703966087001,0.0344976549535582,-0.293712238888328 +KML12-14MG-C2CONTROL,VPS8,-5.86373025287231,-0.261083859117249,0.0323011036845455,0.678931288082764 +NBP13-57A_-6_,VPS8,1.59819745155764,-3.19966305825699,0.0280936822984589,3.01892926194328 +KML12-19MG-C6CONTROL,VPS8,-2.45376943415497,0.334875885773198,0.0276088031331306,-0.00677035626506162 +KML12-50_-6_,VPS8,0.410848544026358,-5.67345422797857,0.0222418880908298,3.04039132930409 +EG10-02A-1BCONTROL,VPS8,2.1526194436732,0.848638272473027,0.0213369965381457,-0.762129336367572 +PSC07-36_-7_,VPS8,0.467561885148559,1.10434372807568,0.0206404895096913,0.195700381511791 +NBP13-80_-6_,VPS8,0.115480762047405,-4.14161968988815,0.0134727105439923,8.38726128057292 +PSC07-55C-7.1_-7_,VPS8,0.745887039569176,-5.58239523267273,0.0106550598759162,2.53164849044231 +KML12-102_-7_,VPS8,-1.9332040999795,-3.71432539867626,0.0103866735820651,7.95058011832953 +KML12-11MG-C2DNMT,VPS8,-0.75303273919602,-4.79127593224068,0.00576896982531728,8.48976020731951 +PSC07-54_-7_,VPS8,0.851624360148301,-3.55511473023757,0.00216480947435181,2.06235423484939 +EG12-25B-7AHDAC,VPS8,0.692163668597268,-5.81571931630851,-0.00583812925771302,3.09173023790636 +SCX93-629_-6__2,VPS8,0.62918615794164,-5.29836527763577,-0.00687437603578553,8.33811621227757 +KML14-4_-7_,VPS8,-0.690338446281281,-0.83717765096044,-0.0108539528642508,1.51813215286151 +BGC11-29_-6_,VPS8,0.949954525311104,-4.0307704703082,-0.0166295286077654,8.00734234310585 +Uracil_3,VPS8,-3.62203553490155,0.679667847717068,-0.0167462888081118,0.106175687315257 +PSC07-94_-7_,VPS8,0.905622856925648,0.128529941936468,-0.0181522762206146,1.76717415961568 +PSC87-27_-6_,VPS8,-0.196176086000712,-3.79217411710256,-0.0230399879406626,2.52082588718433 +PSC04-37_-7__2,VPS8,0.760783281248301,-3.82632539863253,-0.0257765551362777,7.72070364616029 +RT175_1,VPS8,0.0805236008855637,-4.23042966278724,-0.0271347033000276,3.69869030863084 +KML12-41_-6_,VPS8,1.37972632070736,0.49117413404576,-0.0279690522316684,-0.292937393151826 +EG12-2C-9_?_HDAC,VPS8,-2.2519484719935,-1.34002241575361,-0.037125808776876,1.53017879224233 +CQ10-28B-2CONTROL,VPS8,-3.69674736345761,-3.92166354438126,-0.0383063841359316,2.6370683574563 +PSC11-12_-6__2,VPS8,-0.0255626844100189,-4.19688764157148,-0.0419535187273008,2.75084730188791 +KML12-60_-6_,VPS8,0.197976021018403,1.53577614556584,-0.0443560220163702,-0.313672360552153 +EG12-29D-3HDAC,VPS8,-4.01667720401564,-2.9355616103457,-0.0489956183106827,1.25704490994208 +CQ10-28B-3HDAC,VPS8,-1.61566542634816,0.263656686819209,-0.0556979781444457,-0.348688089559104 +PSC07-78_-7__2,VPS8,-0.023098690064238,-3.86681105159586,-0.0560296095468185,8.18936356501471 +PSC04-51_-7__2,VPS8,0.202499412937258,1.36496572864293,-0.0575434379776952,-0.91780784948293 +EG12-40A-3HDAC,VPS8,-1.45026626566574,-3.70336277005927,-0.0591829457908895,1.96681254975097 +KML12-29_-6_,VPS8,2.35556012740314,0.695206638106614,-0.0595486322516966,-1.04631185417832 +Asn_4,VPS8,-3.96126363002576,0.808365299714233,-0.0607551543219405,-1.32502236501629 +EG12-24A-4CONTROL,VPS8,-3.44722747626983,-4.53133263509736,-0.0659428748901025,8.64436580463952 +Phe_3,VPS8,-3.58926016183653,-5.52001139991718,-0.0660588242557234,4.03078434850943 +EG12--44E-2DNMT,VPS8,-3.62990696511456,-6.53255143507991,-0.0663855906497479,3.96156346746299 +PSC08-36_-7_,VPS8,1.56756542989603,-0.319602097800761,-0.0700473202661609,0.832892317426546 +PSC11-37-M15C-1-0_-6_,VPS8,0.379205030432972,-3.75399219509803,-0.0708524791477166,1.63944841106936 +NBP13-53_-6__3,VPS8,1.12886216074227,0.0799349138488254,-0.0721181921528579,-0.176293439219006 +PSC11-37-M7B_-6_,VPS8,2.93535438313511,-3.17165586232116,-0.0727538865769642,1.87553388036833 +SC95-1602_-7_,VPS8,1.21090022536011,0.419619010021125,-0.0766499474287954,-0.166502144722988 +PSC07-20_-6_,VPS8,0.807829793978615,-4.51520377707267,-0.0789421771953147,7.88874763318149 +PSC08-32_-6_,VPS8,1.19615469597882,-3.86376602389836,-0.0810162924209083,7.60949350257944 +PSC11-1_-7_,VPS8,1.67863938190715,0.600265829293161,-0.0842020620539666,-0.789386035730536 +SC95-1609_-6_,VPS8,0.164395500375693,-4.08693917177293,-0.0849066774296666,1.38265252934059 +PSC07-58_-7__2,VPS8,1.27722779729376,-0.0264340864278283,-0.0955918574307956,0.488059022424163 +PSC07-79_-7__1,VPS8,-0.176109097566399,0.328835745120089,-0.0970513599351226,1.0912747230841 +KML14-23a_-7_,VPS8,-0.513926161445779,-5.03158480306853,-0.0977275960954617,8.38238234434652 +EG12-31B-3DNMT,VPS8,2.39837253490708,-1.56641432552822,-0.100404972356177,1.54177453603053 +PSC08-58_-6_,VPS8,1.44837627811635,-4.41882092337078,-0.100538760085742,8.20910507923263 +EG12-27D-5CONTROL,VPS8,2.07668472137164,0.780698597704134,-0.102415031638527,-0.452904841118522 +KML12-19_-7_,VPS8,0.339806408198742,-1.49335581973554,-0.10722652489446,0.594928392882573 +CQ10-29B-2DNMT,VPS8,1.12253224423327,0.353062213467229,-0.110232289218651,-0.653844303650265 +EG12-28D-1CONTROL,VPS8,-2.86920737738616,-0.390061838817068,-0.110311751021665,0.298067325291816 +KML12-63_-6_,VPS8,1.37360982899598,0.3578948057195,-0.114610796731633,0.470204830523938 +EG12-27D-5HDAC,VPS8,1.58395564515837,0.576520004501709,-0.116177329419613,-0.500592299080333 +PSC11-37-M13E_-7_,VPS8,-0.543254370007342,-5.0304855330457,-0.116332198852018,3.76296840063472 +SC95-1626_-6_,VPS8,1.36266953226199,-0.356694576308578,-0.118062520154369,0.852371570917629 +PSC03-25_-6__2,VPS8,-0.308907873287997,-4.01168373200922,-0.119950954228025,3.35836967154457 +SCXIV93-585_-7_,VPS8,2.25851354550174,0.72452166881256,-0.141225635732775,-0.251589210080018 +EG12-31C-3DNMT,VPS8,-1.64363520124537,-4.52520922357252,-0.141344017602572,2.3046517706494 +BGC11-6_-6_,VPS8,1.19796526653668,0.289649474544948,-0.141451047786222,0.367227422893412 +CWR15-13_-7_,VPS8,1.72655982366652,0.462460593112687,-0.147729341059005,0.254757813949489 +CQ10-29C-2BCONTROL,VPS8,-1.93818064028379,-1.13447316602356,-0.147748801092395,1.09513769740512 +NBP11-43_-7_,VPS8,1.21276946245001,0.727817983540119,-0.153957362578862,0.209789136890578 +SC531_-7__2,VPS8,1.31793427859576,-0.265378582903806,-0.158981294532647,0.77955858040949 +NBP13-79_-7_,VPS8,-0.0628149207650473,-3.35646171038352,-0.162619509942046,1.27664633430431 +Qercetin_2,VPS8,0.664820007205839,-2.09282463803079,-0.164359561261095,-0.370106922781293 +KML12-19MG-C6DNMT,VPS8,-2.98839754065779,-6.40657519666326,-0.170962188423728,8.69705523662055 +EG12-33D-4AHDAC,VPS8,-1.91842620285639,-1.93888658182609,-0.171434094233461,0.383245107623277 +KML12-18MG-C1bCONTROL,VPS8,-4.21219859442607,-4.40897321310531,-0.171818429892934,8.42129003618136 +KML12-19MG-C4CONTROL,VPS8,-1.23072396485972,0.279676120764444,-0.178712957556434,0.649827509270465 +EG10-02A-1BDNMT,VPS8,1.72328056682373,0.117053206387914,-0.181205463499935,0.958198378252586 +KML12-8MG-C2HDAC,VPS8,2.74274912396396,0.413736380647315,-0.185412074051297,-1.42562442028998 +SCX93-629_-6__1,VPS8,0.519083238185013,-6.06358008695441,-0.187791873968076,8.56675879717803 +EG12-33D-4ADNMT,VPS8,-0.974407863382401,-7.2276890342625,-0.192609043067084,8.90230273231404 +EG11-07C-2HDAC_1,VPS8,-2.42640453143296,-1.47860424307836,-0.192733911614676,1.10318865836532 +PSC07-74_-7_,VPS8,-0.391744005039998,-4.49515106858585,-0.19315716734093,3.16159705109406 +EG12-25B-7BCONTROL,VPS8,-0.131935221922298,-6.85786739924631,-0.19382853849292,8.70476856673606 +SC95-1665#2_-7_,VPS8,-0.54230862505115,-0.160677328957139,-0.196603214920593,1.35848054814957 +PSC04-21_-6__2,VPS8,-0.297247394298594,-3.41503831619083,-0.198751116106128,2.64663225553478 +CQ10-29B-2HDAC,VPS8,-1.42369639560209,0.459586212152881,-0.205016436023318,-1.37576691680821 +NBP13-43_-7_,VPS8,0.779929812024707,-4.18525636449943,-0.206717567275585,1.47093924377326 +Ser_1,VPS8,-0.792595223123199,-1.87844847448421,-0.212426654571679,0.208445697277168 +KML12-10MG-C1DNMT,VPS8,-2.22979679868782,0.576018458433976,-0.221199886292137,-1.01708359530712 +PSC08-60_-7_,VPS8,1.10487260659657,-0.584196832091947,-0.225595421334339,0.0533368011624909 +KML12-34_-6_,VPS8,1.49166408682822,0.523094097482455,-0.227779810082482,0.54435061026546 +EG12-2C-9_?_CONTROL,VPS8,-1.64594443732559,-0.635294564777655,-0.231737494373385,0.498423266232383 +PSC04-51_-6__1,VPS8,0.330999347948857,-1.97706378500547,-0.237993895108602,0.132708277988001 +KML12-30_-7_,VPS8,1.35977464235327,1.36272082169343,-0.240463697679815,-0.386681449783217 +EG12-24E-3CONTROL,VPS8,-2.34068767065929,0.229569943855095,-0.249780999500499,0.472322355690283 +PSC07-41-7.2_-7_,VPS8,0.179476845078331,-3.58218074202444,-0.250627510953009,2.61928756752744 +EG12-31C-2BDNMT,VPS8,-0.0137434011873382,-2.60414946206629,-0.265172264243358,1.54143519997955 +PSC07-69B_-7_,VPS8,0.510140638016265,-3.6619512420992,-0.265793363642422,7.77101234722542 +EG12-25A-3HDAC,VPS8,1.28796579564139,0.688116117667836,-0.268200731939839,0.558759262343559 +His_1,VPS8,-1.11461177153646,0.885935664131724,-0.26966428861779,-2.40134925502044 +KML12-51_-6_,VPS8,1.1057394551821,0.1829322132889,-0.276605033860593,-0.162906546747666 +EG12-32D-3ACONTROL,VPS8,-2.37341853807932,-6.48335069102844,-0.280775967684071,2.52770154187096 +EG12-10E-1DNMT,VPS8,0.325615176396067,-1.21280904211173,-0.282481963944685,0.931184159223536 +PSC04-57_-6_,VPS8,1.79473134539177,0.93225117619255,-0.283328475397196,0.865275548968684 +EG12-40B-5DNMT,VPS8,-0.926477306703721,-1.71598699066249,-0.284530132459092,0.364241606686076 +SC508_-7_,VPS8,1.46679554619382,0.112043342558068,-0.288545386015443,0.383521692082918 +PSC08-36_-6_,VPS8,1.70338654350975,-3.33189212450664,-0.290250571441331,7.48714120008255 +EG12-25B-5CONTROL,VPS8,3.17760568930122,0.761687249001851,-0.292507124479969,-1.47144945194588 +EG12-31C-3CONTROL,VPS8,-1.24745808737886,-1.7010603126339,-0.304285309689892,0.62555628507358 +PSC03-16_-7__1,VPS8,0.981314821163583,-1.1172939282132,-0.305353989856951,-0.161162120716766 +SCXV93-626_-6__2,VPS8,0.91840406897545,-2.11084218227784,-0.308332185800505,0.960239169136047 +EG12-32A-3ADNMT,VPS8,0.532362104265236,-1.42483327327828,-0.30927437575052,0.879401136803256 +PSC08-16_-6_,VPS8,1.69879133566374,1.11461954006328,-0.313595313998054,-0.262199344634724 +PSC03-39_-6_,VPS8,-1.5396669800549,-1.24845890462422,-0.320832824750072,0.675666500126986 +KML14-8_-7_,VPS8,-0.260280398668332,-5.12530503875645,-0.321959074182576,3.79066606634333 +NBP13-45_-6_,VPS8,1.3910408694561,-4.23692463645244,-0.325946759358292,7.98480838307346 +EG12-25A-3DNMT,VPS8,-0.97207030552809,-5.94868407896167,-0.330191479141711,3.22766587263877 +PSC07-55C-7.4_-7_,VPS8,1.84902823228899,0.869049289932769,-0.331969639692816,-0.694663531732059 +SCX93-624_-7_,VPS8,-1.12685891584629,0.6100523169004,-0.334088350828265,-1.37670580338945 +PSC08-69_-6_,VPS8,2.1246243814777,0.348363374447777,-0.334115919208902,0.0826755574010436 +EG12-25E-4BDNMT,VPS8,3.34910718080799,0.467875600167822,-0.336040840845165,-0.193375172671532 +KML12-54_-6_,VPS8,1.17498619283081,-4.25192567259765,-0.339425264985758,8.53185663490975 +PSC07-58_-6_,VPS8,1.4852664003598,0.648758743469496,-0.343948101079724,0.0796307410562018 +PSC08-57_-7_,VPS8,0.0611019443037158,0.679833811378288,-0.344642175604004,-1.10198342901392 +SCXVI93-653-2_-6__2,VPS8,0.324519730633965,-2.11203590989202,-0.344915426906202,0.776204439644311 +PSC08-14_-6_,VPS8,0.645152558084576,-1.50695433158886,-0.346750345888033,1.41833465296423 +SCXIII93-532_-7_,VPS8,0.0041660749565218,-2.25962908280897,-0.351250478609711,0.979419592384716 +PSC07-58_-7__1,VPS8,2.6597835212447,-4.13271555893289,-0.356657935388244,3.03991318946141 +Arg_2,VPS8,-0.181755245529513,1.21204774781024,-0.359529101148146,-1.45774505006403 +PSC08-33B_-6_,VPS8,1.3124428888982,-0.0103751318860494,-0.36135915512163,0.0499826603973105 +PSC07-94_-6_,VPS8,-0.52378618479664,0.0564481431410939,-0.370635915206082,1.09536892337754 +PSC11-20-M2E-1_-6_,VPS8,0.16838179007879,-4.30414448810157,-0.371711082050937,7.90849426301826 +PSC07-93_-7_,VPS8,2.48993581904636,0.770833754232066,-0.37261273026472,-1.00818071328628 +KML14-23b_-6_,VPS8,-0.326168983103589,-6.91321473827454,-0.372980849229701,8.92512998782767 +PSC07-9BT_-6_,VPS8,-0.00875775745565782,-3.84458140136671,-0.374851444939415,7.81680600308565 +PSC07-133_-6_,VPS8,0.92084884497453,-3.49920108510836,-0.387061805057565,7.83470487139058 +SCXIV93-551_-6__2,VPS8,-0.386438729857887,-3.74653685506783,-0.389741613822455,7.84409032679046 +PSC04-37_-6__2,VPS8,0.560160937025967,-4.03926636708562,-0.391970598480455,3.12837963263666 +EG12-23A-1DNMT,VPS8,0.0249957283073866,-2.03280956394641,-0.400500579783524,1.93442079217739 +Fk506_4,VPS8,2.67018165830325,-3.14831221099919,-0.406894822421929,2.89120555964451 +PSC07-69G_-7_,VPS8,1.22398791946524,-4.2486745895355,-0.407366728231662,8.44244312016309 +PSC07-55C_-7_,VPS8,1.74311996957339,-4.13024609624329,-0.41936464965196,7.95456586750316 +SCXIX93-760_-7_,VPS8,2.46211169898732,-4.64979699390204,-0.419633846780537,8.37852755501568 +NBP13-578-2_-6_,VPS8,-2.49659600254596,-0.386831544045589,-0.426525131105137,-0.287317715314649 +PSC07-133_-7__1,VPS8,-0.1414017749117,-5.81744257422786,-0.432042050571496,3.05377371013804 +PSC07-56_-7_,VPS8,-0.954566437648872,1.36614220882914,-0.432129620721757,-0.349292073625716 +KML12-19MG-C4DNMT,VPS8,0.742077760954129,0.615449962306185,-0.438278991273323,-1.5102974615607 +EG12-25E-4BaHDAC,VPS8,2.60435679785969,0.886770622399802,-0.445129733861694,-1.44425004751414 +CWR15-17_-7__2,VPS8,1.17186169425359,0.666818056991967,-0.445971380305855,-0.434731434601497 +CWR15-3_-7_,VPS8,2.2487303955377,0.928755202497466,-0.448912277852076,0.376222727094977 +EG12-40A-4CONTROL,VPS8,-3.61520594137827,-0.358426971938119,-0.452382650473477,-0.343059885690494 +EG12-30A-6DNMT,VPS8,0.933064633034472,-4.1310971562379,-0.457064410173471,3.18884420130182 +EG12-10E-1CONTROL,VPS8,0.360408475864968,-4.65082109928325,-0.457646589505752,2.86153565234515 +EG12-32B-2CONTROL,VPS8,-1.02997923011365,-0.419380181942955,-0.461519946985293,0.330951887482131 +His_2,VPS8,0.215755014703064,0.97807496522409,-0.463439192778484,-2.28424932892666 +SCXIV93-583_-7_,VPS8,2.12558732179674,1.37377585191699,-0.468628535016094,-0.880340715918774 +KML12-2MG-F3CONTROL,VPS8,0.454583432172066,-2.48613319372477,-0.469659105951094,0.739820453113219 +EG12-31D-4ADNMT,VPS8,-1.8017809532851,-3.53675958334789,-0.473577059340488,2.52779097278141 +KML12-25_-7_,VPS8,1.7754998492931,-1.16667993933866,-0.474470599207029,0.23920906094836 +CC10-28A-1HDAC,VPS8,1.27167875255608,-3.0482442330014,-0.47667201548439,3.16800794464809 +EG12-24A-4DNMT,VPS8,-0.466934269279785,-5.67404612664343,-0.482955984600244,8.79187160506583 +SCXIV93-585_-6_,VPS8,2.61399024701784,0.462252024877734,-0.491572725218851,-1.5842757900449 +EG12-23A-1HDAC,VPS8,0.642521667570043,-3.74504009465199,-0.500460284635482,3.07458821825235 +PSC11-17-M1E-2_-6_,VPS8,0.203007181887009,-1.38202938324122,-0.502273311079746,1.10022773816977 +NBP13-29_-6_,VPS8,1.00527200756747,0.437536003707478,-0.507235619594461,0.332206578267602 +PSC03-23_-6__2,VPS8,-0.707206085735416,-6.54952439257054,-0.514606918076041,8.8148768060147 +CQ10-24B-2HDAC,VPS8,-2.84423971054245,-6.08579545602371,-0.517083207325051,2.5800439500542 +PSC11-37-M35D-1_-6_,VPS8,1.28660129302544,0.150281575701604,-0.518488383902828,1.22078718677722 +PSC07-33_-7__2,VPS8,-0.0337274472832381,-5.19296875467779,-0.520529254904713,3.81706913943936 +EG10-65D-1DNMT,VPS8,-2.40823105589491,0.365482874477806,-0.525030198461114,0.210880809955076 +His_3,VPS8,-0.356092960412962,1.54543961674973,-0.525266962200705,-1.88947872678831 +Tryptophanol_4,VPS8,-1.37621696432498,0.495600326637591,-0.525926170831827,-0.898818672278533 +PSC11-12_-7__2,VPS8,0.503767227354211,1.13277411421442,-0.527212154705084,-0.594490165318748 +EG12-30A-2DNMT,VPS8,1.21873119589584,-0.552062817794188,-0.534429394588985,-0.481241618143237 +KML12-41_-7_,VPS8,1.84027478111148,0.831982054349335,-0.539025205808168,-0.698659512143408 +KML12-53_-6_,VPS8,2.20213703119282,0.644034889968415,-0.546587050450035,-0.686351674210047 +KML12-12MG-C4DNMT,VPS8,-1.1960065272857,-1.15487081958216,-0.549872552754223,1.02885496489105 +KML12-16_-6_,VPS8,1.11597878800736,-4.03740410523777,-0.55161260407327,8.23423777364536 +CQ10-29C-2BDNMT,VPS8,-2.2411032555011,0.0694844573753299,-0.55224181181958,-0.0348285025825927 +EG12-31B-3HDAC,VPS8,1.82396750818755,-4.26943900444894,-0.553321032838057,2.97447282885278 +PSC04-9_-6__1,VPS8,0.156857862500172,-1.23404462369421,-0.553368061252086,1.1023022921377 +PSC08-7_-6__3,VPS8,-0.0570119915525046,1.49173660866914,-0.553698070985009,0.212125610544086 +KML14-48_-7__2,VPS8,1.43208518906332,0.623724844206208,-0.556730592855112,-0.556378122737108 +KML12-43_-7__1,VPS8,3.05440597201208,0.528838290546974,-0.559469592554901,-0.137241827973605 +CQ10-28B-3DNMT,VPS8,-3.58048445783653,0.0309187515430705,-0.560248804725267,0.122231568665937 +EG12-24A-4HDAC,VPS8,1.57211107463739,-4.61341954121613,-0.567146575727665,8.72748335673585 +KML12-30_-6_,VPS8,2.13437314071606,0.764620682413722,-0.569939090519279,-0.780092320428865 +EG12-40B-3HDAC,VPS8,-3.03683080882656,-0.452480737535444,-0.571864822990267,0.38876827078465 +KML12-55_-7__2,VPS8,-0.415992501120232,1.00652671076039,-0.57775229392578,-0.276540811585142 +PSC08-25_-6_,VPS8,-0.0850070537480011,-5.48597411829711,-0.582079718851112,8.50377461570421 +PSC10-14_-7_,VPS8,1.22203978600466,1.02843688379722,-0.582181073191691,-0.484296324684541 +PSC07-340_-6_,VPS8,-0.322352624045038,-3.78063033360553,-0.589403178083939,2.94269051200594 +BGC11-45_-6_,VPS8,-0.285456432849994,1.24935404873637,-0.592892199904007,-0.145376685083097 +EG12-24E-3DNMT,VPS8,-0.298824310220218,-6.39327788658189,-0.60127704179137,8.70171829373111 +PSC08-77_-6__1,VPS8,1.82605320455085,0.594071946666204,-0.60684504384538,-0.496184340830209 +KML12-12MG-C4CONTROL,VPS8,-2.52833358488898,-0.11058474439169,-0.623960143212796,0.890066520041638 +KML12-19MG-C3HDAC,VPS8,0.02961217748396,-2.30555502150049,-0.633037437954989,1.68366952569235 +KML12-12MG-C1DNMT,VPS8,1.67229935047884,0.766055188691259,-0.635124526536178,-1.4182132527284 +EG10-64C-1DNMT,VPS8,3.02527095840948,0.617817543014902,-0.639614118406436,-2.0398901987265 +PSC08-42E_-7_,VPS8,1.20810142718491,-4.64472647852084,-0.646673245519035,8.0937850474605 +KML14-23b_-7_,VPS8,0.433207573178049,-2.85196344118385,-0.653501284735114,0.858734299553331 +PSC11-18-M1D-2_-7_,VPS8,0.322661619955312,-1.24597196257622,-0.653751832665023,0.896828686090645 +EG10-64C-1CONTROL,VPS8,0.708601423980404,0.595865336511223,-0.653947243833658,-1.92675760557846 +PSC08-46_-7_,VPS8,0.939395561035389,1.20401056059638,-0.658526027523626,-0.660327156909575 +CQ10-27B-1CONTROL,VPS8,-2.26192077094878,0.806928394787774,-0.660797175587303,-0.939340171342736 +KML12-19_-6_,VPS8,1.51239056800204,0.448320329266826,-0.672803205354848,0.131552908309445 +EG10-64C-1HDAC,VPS8,2.12430373853533,1.01929631807862,-0.673284841181277,-2.76491287691296 +KML12-18MG-C2HDAC,VPS8,3.67817686246146,0.325690773717455,-0.673914048927586,-0.510255703104877 +EG12-32A-3ACONTROL,VPS8,0.742091921841187,-4.09269501541793,-0.679690435505825,2.90292441932657 +KML12-8MG-C5HDAC,VPS8,1.01395667729362,-5.65248166247934,-0.688926653854047,2.82110888464941 +Arg_4,VPS8,-1.32042913832035,1.05735111169021,-0.701389994406281,-1.87548648608533 +His_4,VPS8,-3.02398688427699,1.22270391698426,-0.704137913288039,-2.18978465211868 +KML12-18MG-C2CONTROL,VPS8,3.05205121879492,0.484687273586022,-0.714824714958617,-0.104306109594701 +KML12-19MG-C6HDAC,VPS8,-0.171323729237208,-1.27322141389504,-0.717472901169248,0.485805762837428 +CQ10-27B-2BHDAC,VPS8,-0.0724898410923911,-3.67047866397676,-0.718562663039145,2.66367442820146 +EG12-30A-4HDAC,VPS8,1.78686092667069,-0.283698778052034,-0.722816302004537,2.13467953635964 +CQ10-29B-1DNMT,VPS8,-1.27694611166564,-6.44920558442819,-0.725029880802767,8.78299865657457 +2PE_1,VPS8,1.68600911212201,0.913688831724442,-0.726543709233644,-0.378188158312343 +KML12-8MG-C2CONTROL,VPS8,2.07284408650672,0.334703982600359,-0.72657614262263,-0.8063333989277 +PSC03-110_-6_,VPS8,0.146426346207867,-1.90478933049527,-0.735363969368131,2.23424653141754 +PSC03-23_-6__1,VPS8,0.24661967950898,-3.50506120938317,-0.739622473341871,2.63209230257263 +PSC07-23_-7_,VPS8,0.908303310544829,-0.383784692293735,-0.744265312975081,5.57282376167116 +KML14-38_-6_,VPS8,1.39137668477744,0.407652150542569,-0.746144827866766,-0.453892837640813 +KML12-49_2,VPS8,2.00489307002259,0.906542342810784,-0.750683880655226,-0.619322061342573 +CQ10-26C-1HDAC,VPS8,0.93899197575462,-3.38986004110663,-0.752030677132831,2.76383892754208 +BGC11-45_-7_,VPS8,0.34475968418861,-1.07869098621194,-0.75433020441187,1.40273337960948 +EG12-31C-3HDAC,VPS8,-1.25404896880625,-0.540774547518182,-0.756199178452136,0.501317001300258 +EG12-3B-5CONTROL,VPS8,-2.34925298433748,-3.47846122081569,-0.75661189332697,2.04710184670082 +KML14-13_-7_,VPS8,0.616063061618901,2.19559580607071,-0.756748924395432,-3.66158332508611 +KML12-14MG-C1CONTROL,VPS8,-0.230448467124742,-6.15058844955699,-0.7673992385034,8.74098995468849 +EG12-27E-2DNMT,VPS8,0.55292573523805,-5.87271726846013,-0.776743297869998,3.54714070391612 +CQ10-28B-3CONTROL,VPS8,-1.60162288385945,-0.652765752428397,-0.777490887486103,0.527742242078015 +EG12-36A-1HDAC,VPS8,-2.75110557943313,-5.86822429211125,-0.786082492228246,3.33922975410446 +EG12-40A-2HDAC,VPS8,-0.233525425581183,-1.79986306043531,-0.788523915584095,1.10459033793267 +SCXV93-624_-6__4,VPS8,1.19635901734904,0.580532829929084,-0.788553105634182,-0.316768333085585 +CQ10-27B-2BDMNT,VPS8,0.955974925288974,-4.09656323658165,-0.798036628573414,2.74854186298881 +CQ10-27B-1DNMT,VPS8,-2.59449426065389,1.03935701857418,-0.801722683231566,-1.37010938339147 +KML12-43_-7__2,VPS8,2.20971614023754,1.05702249678771,-0.813640332014127,0.0513229525383536 +PSC08-64_-7_,VPS8,1.60256305073515,0.722306230978409,-0.81486063827469,-0.968386314182558 +KML14-32_-7__1,VPS8,1.31963661951693,-0.293033976997552,-0.840935461349229,0.974621219996079 +KML12-44_-6_,VPS8,1.66670782308004,0.450157694342107,-0.85047817522336,-0.0282180998930666 +NBP13-33_-6_,VPS8,2.11362744119553,0.074294776459028,-0.850917647644106,0.161052636021427 +RT175_4,VPS8,-6.08453995306883,-1.5847554231885,-0.851927136876267,0.654886515280702 +PSC08-51B_-6__2,VPS8,1.69433369072044,-3.9777082406666,-0.853062305490743,2.57326677818987 +2PE_3,VPS8,-1.42113833250666,0.732089520387083,-0.85968114934787,-0.334158367834675 +KML12-11MG-C2CONTROL,VPS8,-1.14404922122666,-0.369497422047609,-0.86586781829677,0.545839255353412 +EG12-32D-3ADNMT,VPS8,-2.33092171605808,-6.49882666314814,-0.878943339066095,8.81227875371687 +EG11-07C-2CONTROL_1,VPS8,-3.56165654701863,-1.12087613904815,-0.882343979901181,0.93513102969384 +KML12-6MG-C3HDAC,VPS8,-0.247800611214201,-3.68459836797471,-0.886638160602802,2.64400760201888 +PSC11-14_-6__1,VPS8,1.29798765770133,0.279946025879008,-0.888749574225731,0.383064696798185 +KML12-8MG-C5DNMT,VPS8,0.480942911913897,-6.61524257228461,-0.890945314660018,2.99717586724772 +PSC04-51_-6__2,VPS8,2.11012059866811,-2.17483002583484,-0.892400762990723,3.58299274418693 +PSC03-34_-7_,VPS8,1.42003022822052,-3.03700484947488,-0.895114626814049,4.55322817884049 +CQ10-27B-1HDAC,VPS8,-0.415733559185705,1.00764596603657,-0.895755186246504,-1.70727231947676 +EG12-31D-4ACONTROL,VPS8,-0.730862859035574,-4.76467537256631,-0.896023572540355,8.50826953947494 +CQ10-29C-3HDAC,VPS8,-3.43393950676222,-1.31375538250097,-0.898691218784376,1.17790943366659 +KML14-38_-7_,VPS8,2.34623720626804,0.5762900768692,-0.905093569770028,-0.399350791449275 +CQ10-26C-1CONTROL,VPS8,1.08755991069246,-2.945565611204,-0.909240989386491,3.0351723749442 +BGC13-12_-7_,VPS8,-0.253711770063437,3.62186912095244,-0.92162811147461,7.89646403266832 +2PE_4,VPS8,-0.426717350072744,0.952632496094194,-0.927856943829192,-1.56463931653422 +ScotiaCocktail_300mgml_,VPS8,2.14185919250307,0.692503703434139,-0.9327762781035,-0.496239589513887 +PSC07-23_-7.1_,VPS8,1.72684000693161,-6.70113811524449,-0.934472544347419,8.92581692302085 +KML12-8MG-C5CONTROL,VPS8,2.26995149626616,-3.59818909649608,-0.938289954230961,2.28841752472117 +PSC11-14_-6__3,VPS8,2.2315866187862,0.778150775662035,-0.950510855200532,-0.109606913856338 +SCXV93-624_-6__3,VPS8,-0.142291887811655,0.610577392571858,-0.952359558372682,-1.89029858997077 +EG12-20E-3BCONTROL,VPS8,-0.671885798970071,-4.04242253567091,-0.95331634334774,8.33017506660221 +GlutamicAcid_4,VPS8,-0.339337596563252,0.944269092626762,-0.962894733949753,-1.58620642460227 +GlutamicAcid_2,VPS8,2.67228050406167,0.665280751474858,-0.970694153165935,-2.04278973149575 +KML12-18MG-C2DNMT,VPS8,3.49482371999019,0.636802620800405,-0.97540753542019,-0.853987434846299 +CQ10-29C-3DNMT,VPS8,-1.24477662226774,-3.25711389816962,-0.976336752014611,1.75852808166155 +CQ10-30C-1HDAC,VPS8,1.4392809426659,0.214520820510043,-0.983921300028768,0.754771360790367 +EG12-23A-1CONTROL,VPS8,1.01077351218435,-5.07913972780678,-0.994662427625896,3.13507597772301 +EG12-40B-2CONTROL,VPS8,-0.511914303993495,-4.09932385304966,-0.996961954904935,2.35743711332478 +EG12-30B-3CONTROL,VPS8,-1.0201991146254,-6.4323381674902,-0.997943875756459,2.65095282912199 +Ser_3,VPS8,-1.4708956436141,0.343694918314689,-0.998607949395928,-1.21913178296623 +KML12-15MG-E1DNMT,VPS8,-2.15622795601466,-2.07490779431449,-1.0011093745214,1.18624993865421 +PSC07-79_-6_,VPS8,2.67045476112483,0.616295086276843,-1.00786524944699,-0.985492261564349 +PSC03-23_-7__2,VPS8,2.23251213390376,0.910244257649145,-1.01402353918053,-0.700608562928665 +SC572_-7_,VPS8,0.07551975029911,0.723758098905295,-1.01535736230254,-1.03130399261121 +KML12-19MG-C3DNMT,VPS8,-1.56648972310148,-3.64030206490951,-1.03044375318894,7.86882400293219 +KML12-11MG-C2HDAC,VPS8,0.028293192004945,-4.35194281859486,-1.03260706023424,1.99409482720822 +EG12-25E-4BCONTROL,VPS8,2.63549557697912,0.371482694984728,-1.04519121516045,-0.175068419022917 +PSC11-14_-7_,VPS8,0.0659480021488039,0.818064179525082,-1.04682180379167,-0.924356182663635 +EG12-30A-4CONTROL,VPS8,1.39765198072212,-1.7076319253556,-1.05123355552836,2.24640397015644 +PSC03-23_-7__1,VPS8,0.953064863001283,1.03236529962677,-1.05542394938523,0.593031521409677 +EG12-31D-4AHDAC,VPS8,1.24396994257727,-5.90894737556931,-1.06305228247452,2.94487650836394 +EG12-25E-4BHDAC,VPS8,2.81530040575377,-0.354171197641222,-1.07043087846862,0.464147596753691 +KML12-6MG-C3CONTROL,VPS8,2.17919942865615,-3.50718686921475,-1.07055088200786,2.83974789058365 +SCX93-629_-7_,VPS8,0.281142910632097,0.508670678842262,-1.07182794669915,0.145207518167737 +KML12-12MG-C1HDAC,VPS8,2.38227565230494,0.525894234547015,-1.07807056324405,-0.70781476469463 +CQ10-27C-4ACONTROL,VPS8,1.47127443246767,-1.7998450919304,-1.07912708089024,0.298278010687753 +BGC11-42_-6_,VPS8,0.887195496912733,0.653792822231329,-1.07990548222588,-0.120785223145643 +EG12-25A-3CONTROL,VPS8,1.35579644458564,-4.48601781652308,-1.08221311785217,8.57927228298952 +EG12-28D-2CONTROL,VPS8,-2.8281165291501,0.43116724815187,-1.08288854317778,-2.09874607578115 +CQ10-24B-2CONTROL,VPS8,-1.25463259965087,0.0413232910228899,-1.08467481207613,-0.316859089110542 +PSC08-42E_-6_,VPS8,0.526416554690318,0.0656741464499107,-1.08575484392934,0.821988546349337 +KML12-18MG-C1bDNMT,VPS8,-0.957504821710653,-3.8818175668795,-1.08640026837014,2.03068821307127 +PSC07-28_MeOH,CHCl3_,VPS8,1.11099617875148,-0.209482752821772,-1.08683163244364,0.637351879038532 +Thr_4,VPS8,2.27157291783278,-0.912680494227388,-1.09182232017372,0.631712420808432 +EG12-32A-3AHDAC,VPS8,1.32777204912412,-3.45679730906832,-1.09606622912241,2.89713251565448 +2PE_2,VPS8,2.04075248198756,0.914483469846332,-1.10181666898946,-1.58451690621379 +CWR15-8_-6_,VPS8,0.438518917311752,1.0711515669386,-1.10320157469913,0.386387802808955 +KML12-2MG-F4DNMT,VPS8,0.690623166385186,-4.1543209535864,-1.1081363148332,1.87173051016628 +KML12-27_-6_,VPS8,5.07982247364063,0.0122258520407051,-1.11864878703799,0.154045261308446 +EG10-65D-1CONTROL,VPS8,-0.658470381479053,-3.56814202098301,-1.1234992003607,2.43122479974617 +EG12-40A-3CONTROL,VPS8,0.703484286297605,-3.99002975752168,-1.12497329789007,2.55459920289107 +Phe_1,VPS8,0.291097002732611,-1.137236638026,-1.12573872587012,1.62814903213373 +KML14-31_-6_,VPS8,0.587137426846179,-3.65933580100174,-1.15146083583945,8.59044308937117 +NBP11-?_-7_,VPS8,1.16017390498203,-0.262601861221593,-1.16468960437173,0.384081681827343 +PSC07-64Basel_-6_,VPS8,1.03666365966984,0.39124299325798,-1.17293255018228,0.245873348156534 +KML12-2MG-F4HDAC,VPS8,-1.32368614234063,0.189821932546382,-1.18149658654379,0.878346296194542 +SC575_-6_,VPS8,3.74556953539925,0.595361962901454,-1.18675322806354,-0.394716040763074 +BakersFungus_-7_,VPS8,2.36574989712375,0.0532442335077712,-1.18686350158609,0.486947227925486 +CQ10-30C-1CONTROL,VPS8,2.20714998520665,-0.312105748808428,-1.21430214866745,0.969609618720602 +KML12-2MG-F4CONTROL,VPS8,-1.03239366135477,-0.00379563788931985,-1.21927013302524,0.612848405745451 +CWR15-4_-6_,VPS8,0.749340273024223,0.528248565575964,-1.21991555746604,-1.86815034760035 +CC10-28A-1DNMT,VPS8,1.14512492802121,-3.63678507099332,-1.2242405498872,2.98607880387869 +KML12-12MG-C4HDAC,VPS8,-1.26692829557343,-4.92962276695597,-1.22953611147373,2.36596170249027 +KML12-14_-6_,VPS8,0.0517496899026084,0.616955171616272,-1.23639577324407,-0.672982174842829 +CQ10-06B-1DNMT,VPS8,-1.22283736226773,-1.03888346556565,-1.23704930603212,1.59508133078826 +PSC07-78_-6__2,VPS8,2.6992195686804,0.34191123728674,-1.24034129501411,0.650646690370409 +EG12--44E-2HDAC,VPS8,2.25507548442567,-3.33788965709833,-1.2417075515251,2.3246153027258 +SCXIII93-526_-7__1,VPS8,1.86978505822077,0.876473793678778,-1.24277704252688,-1.10146777463294 +EG12-30A-4DNMT,VPS8,2.03837041848825,-4.05282536183007,-1.25006806837073,8.55062413529692 +CQ10-29C-3CONTROL,VPS8,0.213098836890237,-0.229048646310856,-1.25517146212753,0.985958795552527 +KML12-14MG-C2HDAC,VPS8,0.893410103341504,-1.45924954491591,-1.26861591269656,1.34063590540454 +CWR15-8_-7_,VPS8,-0.0984487700292387,0.695842394292678,-1.28249334900854,-1.2181257112573 +CQ10-29B-4ADNMT,VPS8,-0.351589798332744,-6.09887776572433,-1.30257286012919,2.898368712111 +NBPB-61_-6_,VPS8,0.729238893864234,0.68502414498427,-1.31047120118178,-0.124240994205487 +KML14-42_-7_,VPS8,2.29813166148517,0.916499477155714,-1.31512052749279,0.604856103881495 +PSC08-77_-6__2,VPS8,2.39542909338564,-0.586669610689159,-1.31524458520566,1.22819664913252 +PSC11-19_-7_,VPS8,-0.612302855237446,0.551263601995437,-1.32210749031489,-0.23411152772781 +SCXIV93-551_-7__2,VPS8,1.72663163959368,0.543789868835811,-1.3256240805156,-0.902701768033728 +KML12-46_-6_,VPS8,0.184711315825221,0.479978954192255,-1.34075912148548,-0.749588226214245 +KML12-44_-7_,VPS8,2.68077501330464,0.818440196299159,-1.34737066783008,-0.297743346549175 +EG12-35E-2CONTROL,VPS8,-13.1841552935078,-0.279126153654502,-1.35872802981792,-0.64216636891907 +NBP13-96_-6_,VPS8,-0.0485387236400951,0.503093477431358,-1.35918696227206,1.30045988113395 +PSC08-64_-6_,VPS8,0.0395875109150421,0.452688954284497,-1.37375036476108,0.194362135618292 +EG12-36A-1DNMT,VPS8,-0.589075966008297,-0.133809833924022,-1.37422064890136,0.204862454888159 +KML12-15MG-E1CONTROL,VPS8,-1.25760335145448,-4.06042519190256,-1.38746725579759,2.64295924119406 +EG12-40B-2DNMT,VPS8,-1.52821486840345,-0.440518561133357,-1.40810462120878,0.483988012936217 +NBP11-23_-7_,VPS8,1.67010239000304,0.24615191808354,-1.41543051294578,-0.0858728296396417 +EG12-32B-3CONTROL,VPS8,-0.829924332865037,-0.375908553734195,-1.42940849276362,-0.397447440192492 +contaminated_notRF1,VPS8,-1.34586310294461,-2.22526066992842,-1.43136909112777,-0.526614848234313 +PSC07-41-7.3_-7_,VPS8,3.88743127882949,0.845238701620387,-1.4369411473554,-0.732126913842386 +Ala_3,VPS8,-0.711912557693934,0.264532422118985,-1.43846713830715,-1.43210488626049 +Arg_3,VPS8,-0.809656057536296,0.45855410778758,-1.4400823210786,-2.40807390653115 +EG12-28D-2HDAC,VPS8,-5.13729596582444,-0.00466383457819225,-1.45419327779128,-0.710262758839263 +CQ10-27B-2BCONTROL,VPS8,1.87617768722953,-3.43813730532262,-1.45515736027886,2.89476330204027 +KML12-15MG-E1HDAC,VPS8,-1.2778999485573,-3.84352644923827,-1.46303786296751,2.47249454332881 +PSC08-62_-7_,VPS8,0.500485947527555,0.631076360492122,-1.46738636959568,-1.23775195353027 +SCXIII93-532_-6_,VPS8,5.13531595549312,0.75386822461864,-1.47146243575637,-0.567461281515636 +Arg_1,VPS8,-0.166188384699631,0.560758482426424,-1.49025353049959,-2.35904445122201 +PSC07-4_-7__1,VPS8,0.658683285655803,0.610171564018303,-1.50551425085179,-0.760022724521459 +KML12-11_-6_,VPS8,4.32344195646088,0.725368620325255,-1.50915895293899,-1.50996290008732 +EG12-32B-2DNMT,VPS8,1.86830726850845,-2.17425077348186,-1.51669647253911,0.840405037181738 +KML12-49_1,VPS8,3.16917591554188,0.770069727439293,-1.52072956445941,-0.413738981051939 +CWR15-13_-6_,VPS8,0.395461728760323,0.934771585104098,-1.52463049031958,-0.287261443507201 +Met_3,VPS8,-1.12529616081171,-6.69214520201806,-1.52595377259017,8.77595035691072 +EG11-07C-2DNMT_2,VPS8,1.53570343404538,0.046789355033679,-1.52684406911781,-0.52625872306068 +PSC11-14_-6__4,VPS8,1.68689214457843,-0.115987758202209,-1.53299749384301,0.677523128731525 +Gly_4,VPS8,0.643309619784896,-0.329407660378009,-1.53535459038749,-0.623772308708597 +EG12-30A-2HDAC,VPS8,0.771262337661397,0.909507072880037,-1.54276480893586,-1.85278950833197 +Asn_1,VPS8,-0.0337234013155272,0.405582916071365,-1.57752853691949,-0.661216933549773 +Farnesol_4,VPS8,1.59354155419572,-0.0575366816885508,-1.58391710371483,-1.16413990335636 +CQ10-29B-4AHDAC,VPS8,0.570070523481489,-3.33860319804234,-1.59690424349917,2.75734140950871 +Gly_1,VPS8,-1.26187184740654,0.461515639658267,-1.60433959792399,-1.12773988788618 +Fk506_1,VPS8,1.40364203594205,-4.51448909042722,-1.61499072286669,2.92685153289488 +SC95-1675_-6_,VPS8,2.34248760567698,-5.19779198426479,-1.62022547184888,8.37455988102868 +KML12-19MG-C3CONTROL,VPS8,1.76799862512712,-4.14034874862291,-1.63741597884568,8.0124044184874 +KML12-48_-7_,VPS8,2.77002400390405,0.813006456938148,-1.63954198749365,0.316556536013066 +Leucine_3,VPS8,0.370389878247629,-0.25685995258462,-1.64425212640901,-0.164731117474023 +EG12-27D-5DNMT,VPS8,3.18159197900431,-0.255389352351857,-1.65772171285451,0.459530580212867 +PSC07-79_-7__2,VPS8,3.27075195831372,-1.63703580401479,-1.67157158078585,2.17708835774154 +NBP13-9D_-7_,VPS8,0.110117831825183,0.695318460834994,-1.68097239858317,0.771730319390279 +Thr_3,VPS8,-0.854799953942968,-0.6300658527889,-1.68508414147175,-0.517576835774539 +EG12-40B-2HDAC,VPS8,-0.414398389835769,-5.52697604839817,-1.69748585858492,2.65204116987497 +Farnesol_3,VPS8,-1.33877962494644,-0.894999596127919,-1.69894941526286,-0.325964510587678 +KML12-19MG-C1HDAC,VPS8,-0.159918146214614,-2.72029360628515,-1.71013488028909,1.43427825364022 +EG10-65D-1HDAC,VPS8,0.224358765074664,0.181736772354056,-1.71700102873723,-0.111714548826222 +EG11-10A-1DNMT,VPS8,-1.75532312885907,-6.80062729777481,-1.73517913242863,8.79985462071478 +KML12-6MG-C3DNMT,VPS8,1.25980080280135,-3.37154361526385,-1.77693955325107,2.59236236026654 +NBP12-44_-6_,VPS8,5.89537223435291,-3.39601908621351,-2.0452090869021,2.59326475543325 +PSC07-83_-7_,VPS8,-0.124379377192004,-3.4307037780878,-2.07393452869144,3.33616447528418 +PSC11-12_-6__1,VPS8,2.55827828290808,0.832057554679732,-2.08971985911047,-0.893742273164176 +EG12-40A-4HDAC,VPS8,-4.71193730972648,-5.00995355863507,-2.10168372547234,8.66848321922938 +Ala_1,VPS8,-1.51545793216001,0.923769210935478,-2.1117542927522,-2.38814618378677 +Fk506_3,VPS8,2.08396139432876,-0.578409120682676,-2.11597711999805,0.576326638546426 +Leucine_4,VPS8,1.54124944430726,-0.265407709355018,-2.12980023038348,-0.729034351721526 +CQ10-27C-1CONTROL,VPS8,-1.40615307953754,-0.0566923572675338,-2.14635504295618,0.338288025967114 +Val_4,VPS8,0.969945651850531,-0.561537709484147,-2.15409932541109,-0.668299337339352 +KML12-19MG-C5DNMT,VPS8,-2.9733910463581,-4.97229328566068,-2.15799376459347,8.55141194060121 +KML12-17MG-C1HDAC,VPS8,-2.45959259320615,-5.01590620571316,-2.19546568055736,8.56987011656804 +Uracil_1,VPS8,-0.297857323933447,-1.78645880339611,-2.25935459184958,0.137973955000139 +CQ10-29C-4DNMT,VPS8,-5.91413795329189,-0.0345005143142759,-2.38768945788985,0.325715540018117 +KML12-19MG-C1CONTROL,VPS8,-1.22364048686156,-1.02292137075183,-2.38805433351593,0.563397082401076 +EG12-25E-4BaCONTROL,VPS8,5.37825204169703,0.721129522349444,-2.42679601665859,-0.819265683403108 +CQ10-24B-2DNMT,VPS8,-1.99403724773979,-2.63594317513239,-2.42888878108284,0.816781450332396 +BGC13-11_-7_,VPS8,2.26703839950267,-0.211643592837572,-2.4360849392639,0.589287570487445 +KML12-19MG-C5HDAC,VPS8,-3.99218493837985,-0.303219325654329,-2.4422351206502,0.348758674647599 +EG12-32B-3HDAC,VPS8,-0.0493074575082387,-3.26807263977211,-2.50413910853653,0.551406753878228 +CC10-28A-1CONTROL,VPS8,1.89894030166199,-3.72061842197104,-2.57284924310138,2.96043318341504 +CQ10-29B-1HDAC,VPS8,-2.63436524962025,-5.40656102039961,-2.57925726993011,8.68718285240636 +Gly_3,VPS8,-3.18274560042737,-0.796712900509157,-2.58056271383676,0.975454383787573 +Val_3,VPS8,-0.479106563186654,-0.0129495675090599,-2.58642585973053,-1.5629293356703 +Asn_3,VPS8,-1.38647652698889,0.079496075317231,-2.61573348085216,-1.50470677223039 +EG12--44E-2CONTROL,VPS8,-0.515114664465612,-2.51174025585936,-2.66824557012314,0.464451464513913 +PSC08-41_-7_,VPS8,-0.595657744008293,-0.0899429997497973,-2.68460298467265,1.0820451235388 +Fk506_2,VPS8,2.70243611302342,-4.72038466286092,-2.92980787893206,8.20034543453983 +EG12-3B-5DNMT,VPS8,-0.759758149050337,0.133554426102607,-3.23359035185229,-0.378954136976154 +BGC11-10_-7_,VPS8,2.96277087194564,-3.52727201308505,-3.26514803933475,-2.66186896623727 +KML12-17MG-C1CONTROL,VPS8,-0.278233368965242,-6.20459867390617,-3.28359290765055,3.65529614095567 +14.317g,VPS8,1.88504037953566,0.622862244364823,-3.28937740257604,-0.276931985907223 +EG12-27E-2CONTROL,VPS8,3.15339765289825,-2.16531020939492,-3.30878067753635,1.82190745185149 +CQ10-29B-1CONTROL,VPS8,-0.518035853164554,-2.20553741513009,-3.58666184601311,1.54460313221827 +EG12-35E-2DNMT,VPS8,-1.4012038495154,0.374674382930679,-3.64216753708742,-0.0319739872593032 +KML12-19MG-C5CONTROL,VPS8,-2.27855374427475,-0.3393096257977,-3.78002322446424,-0.719899902686996 +CQ10-29C-4HDAC,VPS8,-1.96255860733129,0.370201473795531,-3.87293272138557,-2.13354046901214 +SCXIVI93-638_-7_,VPS8,0.95333897731479,-5.87927361475361,-3.89817562803265,8.40089739610227 +CQ10-29C-4CONTROL,VPS8,0.335012947934112,0.784308906997897,-4.01122058367063,-3.58349169799806 +EG12-40A-4DNMT,VPS8,-1.68579519645332,0.290298937296287,-4.02835352140198,0.224717194803488 +EG11-10A-1CONTROL,VPS8,-0.43422363169839,0.524861330632072,-4.0906223849144,-0.237481015349567 +SC95-1624_-6_,VPS8,3.61372965402897,-3.98611402375268,-4.10746909798796,7.889393906364 +NBP13-46_-7_,VPS8,2.25260744411216,0.816739211548377,-4.11771399473362,-0.523971700389474 +EG11-10A-1HDAC,VPS8,1.12117785653664,-6.42086542968985,-4.29911961517014,2.99064731445993 +KML12-17MG-C1DNMT,VPS8,1.5870164197439,-6.997035296458,-4.56523232845366,8.96011093062638 +EG12-35E-2HDAC,VPS8,1.31270789978432,0.094811334471397,-5.4585224578969,0.0336607668659672 +CQ10-30A-4HDAC,VPS8,23.3944557567319,1.25783452911554,-10.617064327657,-0.488593444526053 +EG12-39B-1CONTROL,VPS8,25.3485438703162,-2.53052449671489,-13.9901457012852,0.502529665910034 +EG12-30A-2CONTROL,VPS8,10.4352332648449,0.0144430031955158,-18.4053378097147,-1.2514717022687 diff --git a/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv-WholeTree.txt b/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv-WholeTree.txt new file mode 100644 index 00000000..e74e5e03 --- /dev/null +++ b/workflow/.old/templates/qhtcp/REMcRdy_lm_only.csv-WholeTree.txt @@ -0,0 +1,3 @@ +root cluster is:0-0-0 +root cluster is:0-0-0 +root cluster is:0-0-0 diff --git a/workflow/.old/templates/qhtcp/mComponent.sh b/workflow/.old/templates/qhtcp/mComponent.sh new file mode 100644 index 00000000..7ee35b8c --- /dev/null +++ b/workflow/.old/templates/qhtcp/mComponent.sh @@ -0,0 +1,32 @@ +#This is a 'master associated' sh script to operate from the REMc source directory +#It is an altered Process.sh with a cd at start + +cd GTF/Component +#Begin the Original Component.sh +#!/bin/bash +# this one code will load the terms2tsv_v4.pl to add two more columns: one is the gene names, the other one is the cluster name column + +set $1 = ORF_List_Without_DAmPs.txt #ORF_List_Without_DAmPs.txt #ORFs_w_DAmP_list.txt +set $2 = REMcRdy_lm_only/*.txt +#paths= ORF_List_Without_DAmPs.txt:REMcRdy_lm_only/*.txt +#if [ $argv -lt 2 ]; then + +#echo "Usage: ./example_v4.sh backgroundFilePath geneListsFolder" + +#else + for i in ${*:2} + do + #echo $1 + #echo $i + ./analyze_v2.pl -an gene_association.sgd -as C -o gene_ontology_edit.obo -b $1 $i + ./terms2tsv_v4.pl $i.terms > $i.tsv + done +#fi +wait +echo "Component complete" +#Concatinatethe Process ontology outputs from the /REMcRdy_lm_only folder +python3 Concatenate_GTF_results.py REMcRdy_lm_only/ ComponentResults.txt +wait +echo "Component Concatenate complete" +pwd +#exit diff --git a/workflow/.old/templates/qhtcp/mFunction.sh b/workflow/.old/templates/qhtcp/mFunction.sh new file mode 100644 index 00000000..671ed258 --- /dev/null +++ b/workflow/.old/templates/qhtcp/mFunction.sh @@ -0,0 +1,32 @@ +#This is a 'master associated' sh script to operate from the /REMc source directory +#It is an altered Function.sh with a cd at start + +cd GTF/Function +#Begin the Original Function.sh +#!/bin/bash +# this one code will load the terms2tsv_v4.pl to add two more columns: one is the gene names, the other one is the cluster name column + +set $1 = ORF_List_Without_DAmPs.txt #ORF_List_Without_DAmPs.txt #ORFs_w_DAmP_list.txt +set $2 = REMcRdy_lm_only/*.txt + +#if [ $argv -lt 2 ]; then +#echo "Usage: ./example_v4.sh backgroundFilePath geneListsFolder" +#else + + + for i in ${*:2} + do + #echo $1 + #echo $i + ./analyze_v2.pl -an gene_association.sgd -as F -o gene_ontology_edit.obo -b $1 $i + ./terms2tsv_v4.pl $i.terms > $i.tsv + done +wait +echo "Function complete" +#fi +#Concatinatethe Process ontology outputs from the /REMcRdy_lm_only folder +python3 Concatenate_GTF_results.py REMcRdy_lm_only/ FuctionResults.txt +wait +echo "FunctionConcatenate complete" +pwd +#exit diff --git a/workflow/.old/templates/qhtcp/mProcess.sh b/workflow/.old/templates/qhtcp/mProcess.sh new file mode 100644 index 00000000..30a0bf55 --- /dev/null +++ b/workflow/.old/templates/qhtcp/mProcess.sh @@ -0,0 +1,37 @@ +#This is a 'master associated' sh script to operate from the REMc source directory +#It is an altered Process.sh with a cd at start + +cd GTF/Process +#Begin the Original Process.sh +#!/bin/bash +# this one code will load the terms2tsv_v4.pl to add two more columns: one is the gene names, the other one is the cluster name column + +set $1 = ORF_List_Without_DAmPs.txt #ORFs_w_DAmP_list.txt #ORF_List_Without_DAmPs.txt +set $2 = REMcRdy_lm_only/*.txt +#paths= ORF_List_Without_DAmPs.txt:REMcRdy_lm_only/*.txt +#if [ $argv -lt 2 ]; then + +#echo "Usage: ./example_v4.sh backgroundFilePath geneListsFolder" + +#else + for i in ${*:2} + do + #echo $1 + #echo $i + ./analyze_v2.pl -an gene_association.sgd -as P -o gene_ontology_edit.obo -b $1 $i + ./terms2tsv_v4.pl $i.terms > $i.tsv + done +#fi +wait +echo "Process complete" +#Concatinatethe Process ontology outputs from the /REMcReady_lm_only folder +#python2 Concatenate_GTF_results.py ./REMcReady_lm_only/ ProcessResultsTest.txt +pwd +python3 Concatenate_GTF_results.py REMcRdy_lm_only/ ProcessResults.txt +#python2 ConcatGTFdebug.py REMcRdy_lm_only/ ProcessResults.txt +#python2 ConcatGTFdebug.py REMcRdy_lm_only/ PresDebug4.txt + +wait +echo "Process Concatenate complete" +pwd +#exit diff --git a/workflow/templates/easy/DMPexcel2mat.m b/workflow/apps/easy/DMPexcel2mat.m similarity index 93% rename from workflow/templates/easy/DMPexcel2mat.m rename to workflow/apps/easy/DMPexcel2mat.m index 4fddde53..238b02ef 100755 --- a/workflow/templates/easy/DMPexcel2mat.m +++ b/workflow/apps/easy/DMPexcel2mat.m @@ -7,7 +7,7 @@ global matDir % If we already have mpdmFile, don't recreate if (exist(mpdmFile, 'file') && ~isempty(mpdmFile)) - disp(sprintf('The Drug Media/MasterPlate Annotation File: %s exists, skipping DMPexcel2mat.m\n', mpdmFile)); + fprintf('The Drug Media/MasterPlate Annotation File: %s exists, skipping DMPexcel2mat.m\n', mpdmFile); return end @@ -82,7 +82,7 @@ if ~exist(masterPlateFile, 'file') || isempty(masterPlateFile) end end else - disp(sprintf('Using MasterPlate file: %s skipping directory selection\n', masterPlateFile)); + fprintf('Using MasterPlate file: %s skipping directory selection\n', masterPlateFile); end % fid=fopen(masterPlateFile)%('exp23PrintTimes.xls'); % textread puts date and time sequentially into vector @@ -175,7 +175,7 @@ numOfMedias=0; % find a matching DrugMedia file [mpFile, mpPath]=fullfile(masterPlateFile); mpFileParts=strsplit(mpFile, '_'); -mpBareFileName=strjoin(parts(2:end-1), '_'); +mpBareFileName=strjoin(mpFileParts(2:end-1), '_'); if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile) if exist(fullfile(matDir), 'dir') @@ -183,7 +183,7 @@ if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile) dmFileToTest=fullfile(mpPath, 'DrugMedia_', mpBareFileName, '.xlsx'); if exist(dmFileToTest, 'file') % Try to find a matching drug media file drugMediaFile=dmFileToTest; - disp(sprintf('Using matching DrugMedia file: %s, skipping directory selection\n', drugMediaFile)); + fprintf('Using matching DrugMedia file: %s, skipping directory selection\n', drugMediaFile); else % Try to find the DrugMedia file automatically (newest created first) files=dir(matDir); @@ -195,7 +195,7 @@ if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile) [~, sortedIndices]=sort(datenum({files(strncmp(dmFiles.name, 'DrugMedia_', 10)).date}), 'descend'); sortedFiles=dmFiles{sortedIndices}; drugMediaFile=sortedFiles{1}; - disp(sprintf('Using newest DrugMedia file: %s, skipping directory selection\n', drugMediaFile)); + fprintf('Using newest DrugMedia file: %s, skipping directory selection\n', drugMediaFile); end catch Me @@ -229,7 +229,7 @@ if ~exist(drugMediaFile, 'file') || isempty(drugMediaFile) end end else - disp(sprintf('Using drugMediaFile: %s, skipping directory selection\n', drugMediaFile)); + fprintf('Using drugMediaFile: %s, skipping directory selection\n', drugMediaFile); end % Drug and Media Plate setup diff --git a/workflow/templates/easy/DgenResults.m b/workflow/apps/easy/DgenResults.m similarity index 96% rename from workflow/templates/easy/DgenResults.m rename to workflow/apps/easy/DgenResults.m index ba1e42a7..403bcb16 100755 --- a/workflow/templates/easy/DgenResults.m +++ b/workflow/apps/easy/DgenResults.m @@ -366,7 +366,7 @@ if isequal(opt,'DB')||isequal(opt,'Both') try copyfile(DBfilename,DBupload) catch ME - disp(sprintf('DB upload failed with error: %s\n', getReport(ME, 'basic'))); + fprintf('DB upload failed with error: %s\n', getReport(ME, 'basic')); rep=sprintf('Failed copyfile to %s - %s', DBupload, rep); errordlg(rep); end @@ -654,7 +654,7 @@ try try copyfile(DBfilename,DBupload) catch ME - disp(sprintf('DB upload failed with error: %s\n', getReport(ME, 'basic'))); + fsprintf('DB upload failed with error: %s\n', getReport(ME, 'basic')); rep=sprintf('Failed copyfile to %s - %s\n', DBupload, rep); errordlg(rep) end @@ -663,6 +663,6 @@ try msgbox([sprintf('Printing Script complete. Check !!Results sheets in %s for results.', printResultsDir)]) catch ME - disp(sprintf('Printing Script failed with error: %s\n', getReport(ME, 'basic'))); + fprintf('Printing Script failed with error: %s\n', getReport(ME, 'basic')); end diff --git a/workflow/templates/easy/EASYconsole.fig b/workflow/apps/easy/EASYconsole.fig similarity index 100% rename from workflow/templates/easy/EASYconsole.fig rename to workflow/apps/easy/EASYconsole.fig diff --git a/workflow/templates/easy/EASYconsole.m b/workflow/apps/easy/EASYconsole.m similarity index 74% rename from workflow/templates/easy/EASYconsole.m rename to workflow/apps/easy/EASYconsole.m index 4c47c498..d00d8291 100644 --- a/workflow/templates/easy/EASYconsole.m +++ b/workflow/apps/easy/EASYconsole.m @@ -1,6 +1,5 @@ % Launch the MATLAB EASY console -% -% Updated 240724 Bryan C Roessler to improve file operations and portability +% Updated 240727 Bryan C Roessler to improve file operations and portability % function varargout = EASYconsole(varargin) global easyDir @@ -27,122 +26,134 @@ function varargout = EASYconsole(varargin) [easyDir,easyFileName]=fileparts(easyPath); easyDir=fullfile(easyDir); [parentDir, ~]=fileparts(easyDir); - userName=system('whoami'); - todayStr=datetime('now', 'Format', 'yyyyMMdd'); % This should match the parent workflow script + parentDir=fullfile(parentDir); % ../easy/apps + userName=getenv('USER'); + dt=datetime; + todayStr=char(dt, 'yyyyMMdd'); % This should match the parent workflow script - disp(sprintf('This script name: %s\n', easyFileName)) + demo=1; + if demo + disp('Running in demo mode'); + disp('Initialized variables:'); + whos; + end - % Set scansDir intelligently (project scans directory) - % Allow users to specify a PROJECT=/mnt/data/ExpJobs/Job directory to analyze with EASY - % This better enables running the new EASY in standalone mode + fprintf('This script name: %s\n', easyFileName); + + % Set scansDir (project scans directory) intelligently if exist('PROJECT', 'var') && ~isempty(getenv('PROJECT')) - scansDir=getenv('PROJECT') - disp(sprintf('Using project path: %s from environment variable PROJECT\n', scansDir)); + scansDir=getenv('PROJECT'); + fprintf('Using project path: %s from environment variable PROJECT\n', scansDir); disp('This usually indicates that we are in standalone mode'); elseif exist('SCANS_DIR', 'var') && ~isempty(getenv('SCANS_DIR')) scansDir=getenv('SCANS_DIR'); - disp(sprintf('Using scans directory: %s from environment variable SCANS_DIR\n', scansDir)); + fprintf('Using scans directory: %s from environment variable SCANS_DIR\n', scansDir); disp('This usually indicates that we are in module mode'); else - dirToScan = fullfile(parentDir,'ExpJobs'); % hardcoded relative to easy script dir TODO: if we change pj layout this will change + dirToScan=fullfile(parentDir,'..','ExpJobs'); % hardcoded relative to easy script dir TODO: if we change pj layout this will change if exist(dirToScan, 'dir') dirs=dir(fullfile(dirToScan, 'dir')); % filter for dirs only [~, sortedIndices]=sort(datenum({dirs.date}), 'descend'); % sort by newest first sortedDirs=dirs{sortedIndices}; scansDir=sortedDirs{1}; disp('Beginning in newest project scans directory'); - disp(sprintf('Using scans directory: %s from hardcoded relative path\n', scansDir)); + fprintf('Using scans directory: %s from hardcoded relative path\n', scansDir); disp('This usually indicates that we are in stand-alone mode without PROJECT or SCANS_DIR environment variables'); else - scansDir=fullfile('/mnt/data/ExpJobs/demo') + scansDir=fullfile(parentDir, '..', 'demo', '20240727_hartmanlab_demo_project'); if exist(scansDir, 'dir') - disp(sprintf('Using scans directory: %s from hardcoded absolute path to demo\n', scansDir)); + demo=1; + fprintf('Using scans directory: %s from hardcoded absolute path to demo project\n', scansDir); else - disp(sprintf('Error: scansDir %s does not exist\n', scansDir)); + fprintf('Error: demo project %s not found\n', scansDir); + disp('Attempting to continue but this may get ugly'); end end end - % If we don't have the EASY_SUFFIX from the module, generate it from scansDir - if exist('EASY_SUFFIX', 'var') && ~isempty(getenv('EASY_SUFFIX')) - easySuffix=get_env('EASY_SUFFIX'); - else - [dirName, ~]=fileparts(scansDir); - [scansDate, scansUserName, easySuffix]=strsplit(dirName, '_'); - % The following is handled by the workflow script that calls this module - % Reimplementing here for stand-alone mode - % While the easySuffix directory exists, increment by one and try again - if exist(easySuffix, 'dir') - oldSuffix=easySuffix; - while exist(easySuffix, 'dir') - count=1; - easySuffix=strcat(oldSuffix,'.',num2str(count)); - end - end - % Might as well check this too - if userName ~= scansUserName - disp('WARNING: userName does not match scansUserName'); - disp("This usually means that you are attempting to run an EASY analysis on another user's project data scans"); - end - % For happiness - if todayStr == scansDate - disp("Early bird gets the worm"); - end - end - - % Helpful variables for running in workflow mode that I'll probably have to reimplement in matlab anyways for standalone mode - if exist('EASY_DIR','var') && ~isempty(getenv('EASY_DIR')) - EASY_DIR=fullfile(get_env('EASY_DIR')); - if easyDir ~= EASY_DIR % sanity check - disp("WARNING: EASY_DIR does not match this script's hardcoded EASY location"); - disp("This is probably OK but if strange beahvior arises, we'll need to fix it in code"); - easyDir=EASY_DIR; - end - disp(sprintf('Using EASY script directory: %s from environment variable EASY_DIR\n', easyDir)); - else - disp(sprintf('Using EASY script directory: %s from hardcoded default\n', easyDir)); - end - + % Sanity check and warning if exist('PROJECT_USER', 'var') && ~isempty(getenv('PROJECT_USER')) - if get_env('PROJECT_USER') ~= userName % sanity check + if ~equal(getenv('PROJECT_USER'), userName) disp("WARNING: PROJECT_USER does not match the current namespace"); end end - if exist('EASY_RESULTS_DIR', 'var') && ~isempty(getenv('EASY_RESULTS_DIR')) - easyResultsDir=fullfile(get_env('EASY_RESULTS_DIR')); - disp(sprintf('Using output directory: %s from environment variable EASY_RESULTS_DIR\n', easyResultsDir)) + % Allow module to override hardcoded default EASY directory + if exist('EASY_DIR','var') && ~isempty(getenv('EASY_DIR')) + EASY_DIR=fullfile(getenv('EASY_DIR')); + if ~strcmp(easyDir, EASY_DIR) % sanity check + disp("WARNING: EASY_DIR does not match this script's hardcoded EASY location"); + disp("This is probably OK but if strange beahvior arises, we'll need to fix it in code"); + easyDir=EASY_DIR; + end + fprintf('Using EASY script directory: %s from environment variable EASY_DIR\n', easyDir); + else + fprintf('Using EASY script directory: %s from hardcoded default\n', easyDir); + end + + % If we don't have tan EASY_SUFFIX from the module, generate it from scansDir + if exist('EASY_SUFFIX', 'var') && ~isempty(getenv('EASY_SUFFIX')) + easySuffix=getenv('EASY_SUFFIX'); + else + % The following is a way to parse the project name from the scansDir + [ ~, dirName]=fileparts(scansDir); + parts=strsplit(dirName, '_'); + scansDate=parts{1}; + scansUserName=parts{2}; + easySuffix=strjoin(parts(3:end), '_'); + % Might as well check this too for fun + if ~strcmp(userName, scansUserName) + disp('WARNING: userName does not match scansUserName'); + disp("This usually means that you are attempting to run an EASY analysis on another user's project data scans"); + end + % For happiness + if strcmp(todayStr, scansDate) + disp("Early bird gets the worm"); + end + end + + if (exist('EASY_RESULTS_DIR', 'var') && ~isempty(getenv('EASY_RESULTS_DIR'))) + easyResultsDir=fullfile(getenv('EASY_RESULTS_DIR')); + if exist(easyResultsDir, 'dir') + fprintf('WARNING: EASY results dir %s already exists\n', easyResultsDir); + disp('Files in this directory may be overwritten'); + end + fprintf('Using output directory: %s from environment variable EASY_RESULTS_DIR\n', easyResultsDir); else easyResultsDirName=strcat('Results_',todayStr,'_',userName,'_',easySuffix); easyResultsDir=fullfile(scansDir,easyResultsDirName); - disp(sprintf('Using output directory: %s\n', PROJECT_PREFIX)) + if exist(easyResultsDir, 'dir') + fprintf('WARNING: EASY results dir %s already exists\n', easyResultsDir); + disp('Files in this directory may be overwritten') + fprintf('Using output directory: %s\n', easyResultsDir); + end end if exist('MASTER_PLATE_FILE', 'var') && ~isempty(getenv('MASTER_PLATE_FILE')) - masterPlateFile=fullfile(get_env('MASTER_PLATE_FILE')); - disp(sprintf('Using drug media file: %s from environment variable MASTER_PLATE_FILE\n', masterPlateFile)) + masterPlateFile=fullfile(getenv('MASTER_PLATE_FILE')); + fprintf('Using drug media file: %s from environment variable MASTER_PLATE_FILE\n', masterPlateFile); else % Try to find MasterPlate_ file on our own mp=fullfile(scansDir,'MasterPlateFiles',strcat('MasterPlate_', easySuffix,'.xlsx')); if exist(mp, 'file') masterPlateFile=mp; - disp(sprintf('Using drug media file: %s from internal logic\n', masterPlateFile)) + fprintf('Using drug media file: %s from internal logic\n', masterPlateFile); else - disp("WARNING: Have you created a MasterPlate_ file?") + fprintf('WARNING: Have you created a MasterPlate_ file in %s/MasterPlateFiles/?\n', scansDir); end end if exist('DRUG_MEDIA_FILE', 'var') && ~isempty(getenv('DRUG_MEDIA_FILE')) - drugMediaFile=fullfile(get_env('DRUG_MEDIA_FILE')); - disp(sprintf('Using drug media file: %s from environment variable DRUG_MEDIA_FILE\n', drugMediaFile)) + drugMediaFile=fullfile(getenv('DRUG_MEDIA_FILE')); + fprintf('Using drug media file: %s from environment variable DRUG_MEDIA_FILE\n', drugMediaFile); else % Try to find MasterPlate_ file on our own dm=fullfile(scansDir,'MasterPlateFiles',strcat('DrugMedia_', easySuffix,'.xlsx')); if exist(mp, 'file') drugMediaFile=dm; - disp(sprintf('Using drug media file: %s from internal logic\n', drugMediaFile)) + fprintf('Using drug media file: %s from internal logic\n', drugMediaFile); else - disp("WARNING: Have you created a DrugMedia_ file?") + fprintf('WARNING: Have you created a DrugMedia_ file in %s/MasterPlateFiles/?\n', scansDir); end end @@ -161,6 +172,12 @@ function varargout = EASYconsole(varargin) searchRangeFile=fullfile(fotosResultsDir,'CSearchRange.mat'); mpdmFile=fullfile(matDir,'MPDM.mat'); + % Decent time to print some helpful vars + if demo + disp('Vars at end of main loop:') + whos; + end + % This can be removed, I think it should add the previous search range? % Might be nice feature but can remove if it causes issues % We are using searchRangeNum to hold old CSrchRange value(s) @@ -172,14 +189,6 @@ function varargout = EASYconsole(varargin) % reduce directory scoping issues when calling scripts w/o a path addpath(easyDir); - % Pulled this out of the opening function - % Seems better to wait until we have our ars set - if exist('scansDir','var') && ~isempty(scansDir) - set(fhconsole,'Name',sprintf('EASYconsole - %s', scansDir)); - else - set(fhconsole,'Name','EASYconsole - No Active Experiment.') - end - % GUI interface design gui_Singleton=1; gui_State=struct( 'gui_Name', mfilename, ... @@ -221,6 +230,7 @@ end % varargin--input arguments to EASYconsole (see VARARGIN) function EASYconsole_OpeningFcn(hObject, ~, handles, varargin) global fhconsole + global scansDir % Choose default command line output for EASYconsole handles.output=hObject; @@ -232,6 +242,15 @@ function EASYconsole_OpeningFcn(hObject, ~, handles, varargin) fhconsole=gcf; set(fhconsole,'Toolbar','none'); fhconsole=gcf; + + % Pulled this out of the opening function + % Seems better to wait until we have our vars set though? + if exist('scansDir','var') && ~isempty(scansDir) + set(fhconsole,'Name', sprintf('EASYconsole - %s', scansDir)); + else + set(fhconsole,'Name','EASYconsole - No Active Experiment.') + end + end @@ -319,7 +338,7 @@ function NewExpDat_Callback(~, ~, ~) sbdg= cell(1,scanMax); save((fullfile(easyResultsDir,'Fotos','Nbdg')),'sbdg'); catch ME - disp(sprintf('ERROR: %s\n', ME.message)); + fprintf('ERROR: %s\n', ME.message); end % set the title for fhconsole depending on existence @@ -340,7 +359,7 @@ function LoadDatFile_Callback(~, ~, ~) global fhconsole try - questdlg('\fontsize{20} Load file from ExpJobs/YourJob/YourResults/matResults','File Creation','OK', struct('Default','OK','Interpreter','tex')); + questdlg('Load results .mat from ../ExpJobs/YourJob/Results/matResults/','File Creation','OK', struct('Default','OK','Interpreter','tex')); [inputFile,inputPath]=uigetfile('.mat','Open Experiment folder and data storage .mat file name','MultiSelect','off'); matDir=fullfile(inputPath); matFile=fullfile(inputPath,inputFile); @@ -368,9 +387,11 @@ function LoadDatFile_Callback(~, ~, ~) try load(fullfile(easyPath,'ImParameters.mat')); catch - disp("Could not load the ImParameters.mat file") + disp("Could not load the ImParameters.mat file"); end - end + end + else + disp('WARNING: cannot find project scans'); end bkupDir=fullfile(matDir,'BkUp'); @@ -395,7 +416,7 @@ function LoadDatFile_Callback(~, ~, ~) fhconsole=gcf; set(fhconsole,'Name',sprintf('EASYconsole - %s', easyResultsDir)); else - set(fhconsole,'Name','EASYconsole -Exp. Analysis NOT selected.'); + set(fhconsole,'Name','EASYconsole - Exp. Analysis NOT selected.'); end end @@ -415,6 +436,7 @@ end function NImCFcombo_Callback(~, ~, ~) try par4Gbl_Main8c + EASYconsole catch EASYconsole end @@ -433,6 +455,7 @@ function PlateCFit_Callback(~, ~, ~) try NCstart catch + EASYconsole end end @@ -455,7 +478,7 @@ function runResults_DBcombo_Callback(~, ~, ~) DgenResults %similar but semicolons removed to restore so cmdLine display info. %Dgen241010qhtcp %par4global -convert 1x1cell of 384cells to be like previous 1x384 cells CFparameter catch ME - disp(sprintf('Error in DgenResults: %s\n', ME.message)); + fprintf('Error in DgenResults: %s\n', ME.message); EASYconsole end end diff --git a/workflow/templates/easy/NCdisplayGui.m b/workflow/apps/easy/NCdisplayGui.m similarity index 100% rename from workflow/templates/easy/NCdisplayGui.m rename to workflow/apps/easy/NCdisplayGui.m diff --git a/workflow/templates/easy/NCfitImCFparforFailGbl2.m b/workflow/apps/easy/NCfitImCFparforFailGbl2.m similarity index 100% rename from workflow/templates/easy/NCfitImCFparforFailGbl2.m rename to workflow/apps/easy/NCfitImCFparforFailGbl2.m diff --git a/workflow/templates/easy/NCscurImCF_3parfor.m b/workflow/apps/easy/NCscurImCF_3parfor.m similarity index 100% rename from workflow/templates/easy/NCscurImCF_3parfor.m rename to workflow/apps/easy/NCscurImCF_3parfor.m diff --git a/workflow/templates/easy/NCsingleDisplay.m b/workflow/apps/easy/NCsingleDisplay.m similarity index 100% rename from workflow/templates/easy/NCsingleDisplay.m rename to workflow/apps/easy/NCsingleDisplay.m diff --git a/workflow/templates/easy/NIcircle.m b/workflow/apps/easy/NIcircle.m similarity index 100% rename from workflow/templates/easy/NIcircle.m rename to workflow/apps/easy/NIcircle.m diff --git a/workflow/templates/easy/NImParamRadiusGui.m b/workflow/apps/easy/NImParamRadiusGui.m similarity index 94% rename from workflow/templates/easy/NImParamRadiusGui.m rename to workflow/apps/easy/NImParamRadiusGui.m index a096e980..37b4958a 100755 --- a/workflow/templates/easy/NImParamRadiusGui.m +++ b/workflow/apps/easy/NImParamRadiusGui.m @@ -41,7 +41,7 @@ function NImParamRadiusGui(scansDir) if exist(pointMapsFile, 'file') load(pointMapsFile); else - load(fullfile(PTmats,'NImParameters')) % hardcoded default + load(fullfile(easyDir, 'PTmats', 'NImParameters')) % hardcoded default disp('WARNING: Using hardcoded NImParameters.mat') end ImParMat; @@ -67,7 +67,7 @@ function NImParamRadiusGui(scansDir) set(fhImParm,'Name',strcat('ImageAnalysis- ',char(easyResultsDir))) else set(fhImParm,'NumberTitle','off') - set(fhImParm,'Name','EASYconsole -Exp. Analysis NOT selected.') + set(fhImParm,'Name','EASYconsole - Exp. Analysis NOT selected.') end btnNumber=5; diff --git a/workflow/templates/easy/NIscanIntensBGpar4GblFnc.m b/workflow/apps/easy/NIscanIntensBGpar4GblFnc.m similarity index 97% rename from workflow/templates/easy/NIscanIntensBGpar4GblFnc.m rename to workflow/apps/easy/NIscanIntensBGpar4GblFnc.m index 16fa386c..b4263689 100755 --- a/workflow/templates/easy/NIscanIntensBGpar4GblFnc.m +++ b/workflow/apps/easy/NIscanIntensBGpar4GblFnc.m @@ -1184,7 +1184,7 @@ function [Tmpsbdg2, scanIntens, F_spots, bmtp, optomizedPos, TmpexpScanIntens2, title(strcat('Scan',num2str(selScan),'timePt-',num2str(tPt))); clf(hfscanIm,'reset'); set(hfIm,'NumberTitle','off') - set(hfIm,'Name', char(matFile)) % strcat('EASYconsole- ',char(easyResultsDir))) + set(hfIm,'Name', char(matFile)) if Fflg==1 fullfile(fotosResultsDir,strcat('Scan',num2str(selScan),'_timePt-',num2str(tPt))) hgsave(fullfile(fotosResultsDir,strcat('Scan',num2str(selScan),'_timePt-',num2str(tPt)))); %F 14_0626 diff --git a/workflow/templates/easy/PTmats/NCFparms.mat b/workflow/apps/easy/PTmats/NCFparms.mat similarity index 100% rename from workflow/templates/easy/PTmats/NCFparms.mat rename to workflow/apps/easy/PTmats/NCFparms.mat diff --git a/workflow/templates/easy/PTmats/NImParameters.mat b/workflow/apps/easy/PTmats/NImParameters.mat similarity index 100% rename from workflow/templates/easy/PTmats/NImParameters.mat rename to workflow/apps/easy/PTmats/NImParameters.mat diff --git a/workflow/templates/easy/PTmats/NPTdirectParameters.mat b/workflow/apps/easy/PTmats/NPTdirectParameters.mat similarity index 100% rename from workflow/templates/easy/PTmats/NPTdirectParameters.mat rename to workflow/apps/easy/PTmats/NPTdirectParameters.mat diff --git a/workflow/templates/easy/PTmats/NPTmapDirect.mat b/workflow/apps/easy/PTmats/NPTmapDirect.mat similarity index 100% rename from workflow/templates/easy/PTmats/NPTmapDirect.mat rename to workflow/apps/easy/PTmats/NPTmapDirect.mat diff --git a/workflow/templates/easy/PTmats/NPTmapSearch.mat b/workflow/apps/easy/PTmats/NPTmapSearch.mat similarity index 100% rename from workflow/templates/easy/PTmats/NPTmapSearch.mat rename to workflow/apps/easy/PTmats/NPTmapSearch.mat diff --git a/workflow/templates/easy/PTmats/NPTsearchParameters.mat b/workflow/apps/easy/PTmats/NPTsearchParameters.mat similarity index 100% rename from workflow/templates/easy/PTmats/NPTsearchParameters.mat rename to workflow/apps/easy/PTmats/NPTsearchParameters.mat diff --git a/workflow/templates/easy/PTmats/Nbdg.mat b/workflow/apps/easy/PTmats/Nbdg.mat similarity index 100% rename from workflow/templates/easy/PTmats/Nbdg.mat rename to workflow/apps/easy/PTmats/Nbdg.mat diff --git a/workflow/templates/easy/datatipp.m b/workflow/apps/easy/datatipp.m similarity index 100% rename from workflow/templates/easy/datatipp.m rename to workflow/apps/easy/datatipp.m diff --git a/workflow/templates/easy/figs/NPTdirect.fig b/workflow/apps/easy/figs/NPTdirect.fig similarity index 100% rename from workflow/templates/easy/figs/NPTdirect.fig rename to workflow/apps/easy/figs/NPTdirect.fig diff --git a/workflow/templates/easy/figs/searchNPTIm.fig b/workflow/apps/easy/figs/searchNPTIm.fig similarity index 100% rename from workflow/templates/easy/figs/searchNPTIm.fig rename to workflow/apps/easy/figs/searchNPTIm.fig diff --git a/workflow/templates/easy/p4loop8c.m b/workflow/apps/easy/p4loop8c.m similarity index 100% rename from workflow/templates/easy/p4loop8c.m rename to workflow/apps/easy/p4loop8c.m diff --git a/workflow/templates/easy/par4GblFnc8c.m b/workflow/apps/easy/par4GblFnc8c.m similarity index 100% rename from workflow/templates/easy/par4GblFnc8c.m rename to workflow/apps/easy/par4GblFnc8c.m diff --git a/workflow/templates/easy/par4Gbl_Main8c.m b/workflow/apps/easy/par4Gbl_Main8c.m similarity index 96% rename from workflow/templates/easy/par4Gbl_Main8c.m rename to workflow/apps/easy/par4Gbl_Main8c.m index ddcf85f9..a3e4d128 100755 --- a/workflow/templates/easy/par4Gbl_Main8c.m +++ b/workflow/apps/easy/par4Gbl_Main8c.m @@ -244,4 +244,3 @@ for scanCnt=1:numScans fclose(fid); end -EASYconsole; diff --git a/workflow/docs/.~lock.EASY Manual 17_0614.odt# b/workflow/docs/.~lock.EASY Manual 17_0614.odt# new file mode 100644 index 00000000..66d6c281 --- /dev/null +++ b/workflow/docs/.~lock.EASY Manual 17_0614.odt# @@ -0,0 +1 @@ +,bryan,workstation,24.07.2024 21:41,file:///home/bryan/.config/libreoffice/4; \ No newline at end of file diff --git a/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.md b/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.md new file mode 100644 index 00000000..c2a8e79b --- /dev/null +++ b/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.md @@ -0,0 +1,491 @@ +**Readme for Q-HTCP analysis using current (2/17/20) practices-** + +**![][image1]** + +**Figure 1\. Flow diagram for Q-HTCP analysis.** Sections describing steps in current Q-HTCP analysis are indicated numerically. + +**\*\*\* WHAT FOLLOWS IS OUT OF DATE- WE ARE CURRENTLY USING A SET OF SHELL SCRIPT THAT CALLS THE Rscripts DESCRIBED BELOW AND PRODUCES THE DIRECTORIES NEEDED TO LOCATE THE INPUT FILES AND TO RECEIVE THE OUTPUTS. SEE GOOGLE DOC “Copy of Multi-experiment Study Analysis System Guide\_jh24\_0718”** + +**\*\*\*Highlighted words indicate places in your code/script that you will need to modify the name to match the files, path, or description of your particular experiment\*\*\*** + +**It may also be possible to implement Rprojects in order to avoid the need to change file paths. The advantage of Rprojects is that the folder containing the files is the top of the directory, thus the same directory structure can be used on different computers to call and write files from the Rscripts. [seansantos18@gmail.com](mailto:seansantos18@gmail.com)**is this description correct? could this work on the server just as well as it works from our laptops?** + +[**seansantos18@gmail.com**](mailto:seansantos18@gmail.com)**also wanted to ask if it is necessary to use FileZilla to move files around on server from Mac, whereas it is possible to use keyboard shortcuts (cont-C / cont-V) on PC, because there is not “middle click” on a Mac?** + +Prior to performing interaction score analysis, use John Rodgers Matlab software to generate the \!\!results.txt file. Once the \!\!results.txt file is generated view the experiment using John Rodgers’ software; open Matlab 2014 and the most current version of EZView. Run the EZView.mat program and click “Add to path” when prompted. Navigate to /media/data/ExpJobs/YourJobName/Results-date/matResults/date\_reg.mat to view your plates. Look for plate contamination, plate effects (such as much faster/slower growth than normal, or areas with no growth, gradient from the middle – likely did not mix drug well if seeing this), missing image time points, dropped plates, darker images, and curve fits that do not match visual spot growth. When satisfied with the image results and the Matlab analysis, move on to generating the interaction scores. + +**1.0 Generate interaction scores for genome wide experiments from a \!\!results.txt** + +1) Create folder on the server for the new experiment (On server, for most experiments, I’ve used the path /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**Folder**/) + It is also possible to create new directory using command line “mkdir” + Example: + +![][image2] + +1) Copy \!\!results.txt (results sheet from matlab) to folder (add path to \!\!results.txt files) +1) Copy lm\_interaction\_drug\_all\_V5\_do\_not\_print\_avg\_Z.R (interaction score Rscript) to folder (this is located in the Q\_HTCP\_Analysis Folder) +1) Open the terminal within /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**Folder**/ path or use command line to navigate to your new folder using “cd” + Example: cd /media/data/Santos\_Sean/Q\_HTCP\_Analysis/Folder/![][image3] + +1) Run the interaction scores Rscript from the command line using the following: + Rscript lm\_interaction\_drug\_all\_V5\_do\_not\_print\_avg\_Z.R \!\!results.txt ExperimentName\_Analysis1/ + ![][image4] + + ***Modified scripts** can be used to exclude certain data types –* an example we performed was using the JS\_19\_1224 P53 data where the \!\!results.txt contained both WT and 2KR data. We wanted to include only the 2KR data and ran the following modified Rscript:![][image5] + In the above example we used a slightly modified version of the Rscript where we select a subset of the data from \!\!results.txt using the modifications as seen below: + ![][image6] + In the added line of the above R script (last line, shown as opened in the gedit text editor), we subset our data.frame using the specifics column and specify to only include rows where “2KR” is in the specifics column. + + The interaction scores Rscript will generate several files as seen below: + ![][image7] + + The **ZScores\_Interaction.csv** file is the primary output containing the interaction scores and is used as the input output file for most of the downstream analysis. + ![][image8] + + + + + +1) After interaction scores are generated, **check the Frequency\_delta\_background.pdf file** in the QC folder to identify data that could possibly be contaminated. The default setting will remove any strains where the delta background is 3SD from the mean, but **if there is no or almost no contamination, you may need to set this threshold higher to 5 or above**. Find the following lines in the lm\_interaction\_drug\_all\_V5\_do\_not\_print\_avg\_Z.R and change 3\*sd to 5\*sd or a higher value. Use **Save As** (not just save) to rename the lm\_interaction\_drug\_all\_V5\_do\_not\_print\_avg\_Z.R to something such as lm\_interaction\_drug\_all\_V5\_do\_not\_print\_avg\_Z\_deltabackground5SD.R. + + ![][image9] + + **1.1 REMc** + + **\*\*\* WHAT FOLLOWS IS OUT OF DATE- THE REMc PROCESS IS NOW INCORPORATED IN A JAR FILE AND IN A SHELL SCRIPT THAT ALSO CALLS Rscripts TO ARRANGE DATE. SEE GOOGLE DOC “Copy of Multi-experiment Study Analysis System Guide\_jh24\_0718”** + + At this stage, we want to compare multiple experiments to each other to identify phenomic modules of interest. Recursive Expectation Maximization clustering (REMc) can be used to cluster interaction scores and then perform biological enrichment analysis. + \-See doi: [10.1063/1.3455188](https://doi.org/10.1063/1.3455188) for the Chaos paper and Jingyu Guo’s readme.V2 to set up Eclipse. + +1) **Merge the files**. This can be done using **R or Excel**; **if you want to use R see appendix 3.1 – you will need to use R if the total number of genes between experiments are not the same\!** Using Excel, open the first **ZScores\_Interaction.csv** file of interest. \*\*\***Sort by OrfRep\*\*\***. **Copy** the **OrfRep, Gene, Z\_Shift\_L, Z\_lm\_L, Z\_Shift\_K, Z\_lm\_K** columns into a new excel sheet. Rename the **Z\_Shift\_L, Z\_lm\_L, Z\_Shift\_K, Z\_lm\_K** to have descriptive names preceding the current column names for example **Gem\_Z\_Shift\_L, Gem\_Z\_lm\_L, Gem\_Z\_Shift\_K, Gem\_Z\_lm\_K**. Open the next **Zscores\_Interaction.csv** file of interest, \*\*\***Sort by OrfRep (don’t forget to do this or the ORFs will not match\!)\*\*\***, and repeat the same steps to add additional experiment columns to the new Excel sheet. Give different descriptive names. For example: **Cyt\_Z\_Shift\_L, Cyt\_Z\_lm\_L, Cyt\_Z\_Shift\_K, Cyt\_Z\_lm\_K**. Repeat this process for as many experiments you want to compare in REMc. I generally reorganize the excel sheet at this step so that the order of columns is how I want to output the heatmaps later on (i.e. All K values to the left, all L values to the right, alternate Z\_Shift and the Z\_lm). I also generally save this file as “Date\_Description\_ShiftInt\_WithNAs.csv” (can also save as excel (.xlsx) format – doesn’t really matter yet). Below is an example of how this file looks (using gemcitabine (Gem) and cytarabine (Cyt) as an example. + + ![][image10] + + + + + + + +1) **Substitute the NAs**. For REMc to work correctly and to allow proper heatmap display we will substitute the NA values with small, non-zero values. Open the “Date\_Description\_ShiftInt\_WithNAs.csv” file in excel and **select only the “Shift columns”**. Perform a **find and replace** using find “NA” and replace with “0.001”. + + ![][image11] + + Next, replace the NAs in the Z\_lm columns using **find and replace** to find “NA” and replace with “0.0001”. Note the extra significant digit here – the heatmap script cannot draw the dendrogram if all the values for a gene across a row are NAs and will error out. Thus, we want to be able to identify NAs in one set of columns, but also be able to draw the dendrogram by giving some small value that will cluster away from the rest of the data, and so the script only generates “NAs” in rows with the 0.001 value (shift) but will print the Z\_lm value as a white color to indicate NAs in this way. + + ![][image12] + + After substitution, the example looks like the following: + +![][image13] + +I generally name this file “Date\_Description\_ShiftInt\_NArem.csv” + +1) **Fill empty cells in the gene column by copying over ORF name. *Some ORFs do not have assigned gene names.* **Fill in the ‘missing’ genes** (if there are any – this will depend on the \!\!results.txt file. If there are blanks in the genes column, this can cause issues with REMc and heatmap generation. If it is a genome wide experiment (making sure it is sorted by OrfRep), I will often copy and paste over the Gene column from a previous genome wide experiment with the gene names filled in. However, I also have an R script that can fill in the missing names and updates gene names (and if no name is given it replaces it with the OrfRep). The Rscript is named **14\_0430\_cmd\_MatchGenes.R**. See the following for usage: + + Rscript 14\_0430\_cmd\_MatchGenes.R input\_file.csv SGD\_features.tab output\_file + + Arg 1\) Input\_file.csv + \-This should be your “Date\_Description\_ShiftInt\_NArem.csv” file generated in step 2\. Make sure that OrfRep or ORF is in column 1 and Gene is in column 2\. + Arg 2\) SGD\_features.tab – download this from [https://downloads.yeastgenome.org/curation/chromosomal\_feature/](https://downloads.yeastgenome.org/curation/chromosomal\_feature/) + Arg 3\) output\_file + \-The name you want to give the file. I generally call this file “Date\_Description\_ShiftInt.csv” + +![][image14] + +We will also need to remove all unusual characters in the gene column after adding the gene names (as above, either by copy/paste or by 14\_0430\_cmd\_MatchGenes.R). Open the newly created file (“Date\_Description\_ShiftInt.csv”) in excel and use the find function to **look for the following characters** – comma (**,**) or asterisk (**‘**) and **replace the gene name with the OrfRep** in all instances. Also search for **YKL134C** – in the corresponding gene column, excel will auto change this to a date (October 1). Change the gene name to YKL134C to prevent this conversion at any step. + +1) **Select only the L and K z-scores above |2| for input into REMc** (if you want to cluster all the data you can skip this step, but in general selecting only the values above 2SD tends to clean up the clustering). I generally do this as an “ad-hoc” analysis in R and will give example code below. Open RStudio and create a new R Script. You can copy and paste the following into R and will only need to modify bolded parts (see below): + + X \<- read.csv(file="**filePath/Date\_Description\_ShiftInt.csv**",stringsAsFactors \= FALSE) + + X \<- X\[abs(X$**DescName1**\_Z\_lm\_K) \>= 2 | abs(X$**DescName1**\_Z\_lm\_L) \>= 2 | abs(X$**DescName2**\_Z\_lm\_K) \>= 2 | abs(X$**DescName2**\_Z\_lm\_L) \>= 2,\] + + write.csv(X,file \= "**filepath/Date\_Description\_ShiftInt\_Above2SD.csv**",row.names \= FALSE) + ![][image15] + + \-change “**filePath/Date\_Description\_ShiftInt.csv**” to the file path to your file and then the name of your file. Use tab within the quotation marks in RStudio to help locate files. + \-Change “**DescName1**” and “**DescName2**” to whatever descriptive name you gave your headers in Date\_Description\_ShiftInt.csv (i.e. could be “Gem” and “Cyt”) + \-replace “**filepath/Date\_Description\_ShiftInt\_Above2SD.csv**” with a path to the file you want to create. + \-Save and run the above R script with a descriptive name such as “Date\_Get2SD\_for\_ExperimentDescription.R” in case you need to go back and see what you did. + +1) **Generate files for REMc** and for later adding the shift values back into the heatmaps (we don’t cluster the shift values but are interested in using them at the heatmap stage to identify genes where deletion results in a large initial shift indicating “sick” strains). \-Open the “Date\_Description\_ShiftInt\_Above2SD.csv” created in step 4 (or “Date\_Description\_ShiftInt.csv” generated in step 3 if step 4 was skipped and you want to use all genes regardless of z-score for clustering) in Excel. + i) **Remove** all of the **columns with shift values** and save the file as “**Date\_Description\_REMcReady.csv**”. This file is used as input for REMc. + ii) Reopen “Date\_Description\_ShiftInt\_Above2SD.csv” and **remove** all the **columns with Z\_lm scores**. Save this file as “Date\_Description\_Shift.csv”. This file will later be used to add the shift values back when generating heatmaps associated with REMc generated clusters. + +1) **Perform REMc**. Copy the “**Date\_Description\_REMcReady.csv**” file generated in step 5 to your eclipse-workspace/REMc/ directory. **Open eclipse** by double clicking on the eclipse icon in the eclipse folder in your home folder. Once in eclipse, **select “Run” and then** “**Run configurations**” from the top menu. + +![][image16] +Next, select the Arguments tab and look to see that you have the following: + +Program arguments: + +Date\_Description\_REMcReady.csv + +GeneByGOAttributeMatrix\_nofiltering-2009Dec07.tab + +ORFs\_w\_DAmP\_list.txt + +1 + +true + +VM arguments: + +\-Xms8000m \-Xmx8000m + +![][image17] + +Once set, you will only need to alter the first line in program arguments to the name of your **Date\_Description\_REMcReady.csv** file. Select **apply** and then **run** in the bottom right corner to perform REMc on the selected file. Look to see if any errors are generated after hitting run (common errors here are due to unexpected characters in the ORF or Gene columns, or non-numeric values in the other columns (such as “NA”). If REMc is running, in the eclipse-workspace/REMc/ directory you should see a file called **Date\_Description\_REMcReady.csv-WholeTree.csv**. This file will be updated every few minutes as new clusters are generated. After completion of REMc, you should have **Date\_Description\_REMcReady.csv-WholeTree.csv**, **Date\_Description\_REMcReady.csv-finalTable.csv**, **Date\_Description\_REMcReady.csv-summary.csv**, **Date\_Description\_REMcReady.csv.arff**. Copy these files to a new directory with a descriptive name to perform REMc (such as **Date\_Description\_Clustering/**). + +**1.1.1 GTF (Gene Ontology Term Finder)** + +GTF will look for enriched gene ontology terms in the REMc clusters. Several files are required to run GTF and they must be copied to the working folder (I usually create a new folder to contain all of the REMc/GTF files) to run GTF. + +1\) **Make a new folder, e.g., ‘Clustering’, in the project folder and** **Copy REMc and GTF files to the new folder**. (Most of Sean’s Clustering results are on Data2/Santos\_Sean/Documents/Hartman\_Lab/ACS\_project/). Once REMc is finished, which is described in the previous section, copy the following files to your working folder **Date\_ResultsDescription\_REMcReady.csv-finalTable.csv**, **Date\_ResultsDescription\_REMcReady.csv-WholeTree.csv, Date\_ResultsDescription\_REMcReady.csv-summary.csv**. Next, copy to your working folder the files located in the following directory on the server: **/media/data/Santos\_Sean/GTF\_Files/** + +![][image18] + +If the copy/paste function doesn’t work for you on the server from Xquartz (we’ve had issues with this on Mac OS), then use FileZilla to copy the files to your computer (files in **/media/data/Santos\_Sean/GTF\_Files/**) and then you can copy the files from wherever you save them on your local computer to your working folder on the server. It’s good practice to make a backup of the files off of the server, but one can skip this step by copying them to the server desktop (which exists on a different drive) and then copy back again to the desired folder (dragging between folders on the same drive moves rather than copies the files). + +In your working directory, create folders named “Process”, “Function”, and “Component” and we will perform GTF for each of these ontologies in these folders. Copy the GTF files into these folders. (The GTF files can be removed from these folders after GTF is complete to save space). Copy **DconJG.py and AddShiftVals.R** into the parent folder as well (needs to be in same directory as the **.csv-finalTable.csv** file. In the example of P53\_NoDamps (involving HLD, HLEG, WT and 2KR), there were 5 different comparisons, with component, function and process GTF for each. Also note, ‘Pairwise\_Comparisons’ and ‘Multiple\_Comparisons’ folders are GTA only. + +2\) **Run the DconJG.py script**. GTF requires files to have a specific format and to generate text files for each cluster in this format we can apply the DconJG.py script. To run this script, open the command line in your working folder and apply the script in the following format: + +**python DconJG.py Date\_Results\_REMcReady.csv-finalTable.csv cluster\_origin\_column\_num output\_path\_name** + +\-the cluster\_origin\_column\_num tells the script where the header/column containing “cluster\_origin” is located in the \-finalTable.csv file. (i.e. – is it in the 8th column, 10th column, etc.?) + +![][image19] + +\-I usually put a “./” as the argument for “output\_path\_name”, which makes a folder in my working directory with the same name as my input \-finalTable.csv file. Copy the resulting folder containing the .txt files for each cluster into the “Process”, “Function” and “Component” directories. + +3\) **Perform GTF**. In the process folder in your working directory, run the “**Process\_example\_v4.sh**” script: + +**./Process\_example\_v4.sh ORFs\_w\_DAmP\_list.txt Date\_Results\_REMcReady/\*.txt** + +\-If necessary, substitute “ORFs\_w\_DAmP\_list.txt” with the proper background file containing a list of ORFs to include as the background for GTF. For example, if DAmPs were excluded use the 17\_0503\_ORF\_list\_without\_DAmPs.txt file. + +![][image20] + +In the Function and Component folder, run the “**Component\_example\_v2.sh**” and “**Function\_example\_v2.sh**” scripts in their respective folders using the same arguments as the Process\_example\_v2.sh. \*\*\* if you do not run these scripts in separate folders, you will overwrite the results from one ontology with another\*\*\* + +In your **Date\_DescriptionOfResults\_REMcReady/** folder, new .terms.txt and .tsv text files should be generated ) and GTF will run through all .txt files for clusters (can take 10 mins to an hour depending on how many total clusters). The example below shows the files created after running GTF. + +![][image21] + +4\) **Concatenate the GTF results.** Use the Concatenate\_GTF\_results.py file to concatenate the GTF results for each ontology by running this script in the folder for each respective ontology. The script can be used in the following way: + +python Concatenate\_GTF\_results.py Date\_DescriptionOfResults\_REMcReady/ Date\_GTF\_Results\_**Ontology**\_DescriptionoOfResults.txt + +![][image22] + +\-Make sure to change “Ontology” in the above line to either “Process”, “Function”, or “Component”. + +Lastly, I prefer to compile the concatenated results sheets into one excel document. To do this, I will copy the files to my computer using FileZilla and then copy and paste the tables to separate worksheets in the same Excel document and save this file as Date\_GTF\_Results \_DescriptionOfResults.xlsx + +**1.1.2 Heatmap generation for REMc**. Automatic generation of heatmaps from REMc clusters can be performed using the Rscript “**18\_0205\_heatmaps\_zscores\_2SD\_color\_NARem\_Z\_lm.R**”. This file can be found in **/media/data/Santos\_Sean/Rscripts/REMc\_Heatmaps/** + +Before we make heatmaps, we prefer to add the shift values back with the interaction scores in Date\_Description\_REMcReady.csv-finalTable.csv. + +1\) **Add back the shift values**. Use “**AddShiftVals.R**” to add the shift values back to the Date\_Description\_REMcReady.csv-finalTable.csv. Use the script in the following format: + +Rscript AddShiftVals.R Date\_Description\_REMcReady.csv-finalTable.csv Date\_Description\_Shift.csv Date\_Description\_REMcReady.csv-finalTable**WithShift**.csv . Note: ‘Date\_Description\_Shift.csv’ was saved when the shift values were removed just prior to REMc (see 1.1\_5)). + +This will generate a file with both the interaction scores and the shift values in the finalTable.csv format. However, we will need to **reorganize** the Date\_Description\_REMcReady.csv-finalTable**WithShift**.csv so that each shift values is correctly ordered to the left of its associated Z\_lm interaction score. Open the Date\_Description\_REMcReady.csv-finalTable**WithShift**.csv file in excel and manually reorder the shift values to their correct location. + +2\) **Run the heatmap script**. Use **18\_0205\_heatmaps\_zscores\_2SD\_color\_NARem\_Z\_lm.R** to generate the heatmaps. Use this script as described below: + +Rscript 18\_0205\_heatmaps\_zscores\_2SD\_color\_NARem\_Z\_lm.R Date\_Description\_REMcReady.csv-finalTable**WithShift**.csv Heatmaps/ + +\-this will create a folder called “Heatmaps” in your directory with PDFs for all REMc clusters. + +3\) **Concatenate the heatmaps into one PDF**. Use pdf tool kit to concatenate the heatmaps by running the following line in the command line in your working folder. + +pdftk Heatmaps/\*.pdf output Date\_Description\_Heatmaps.pdf + +**1.2** **GTA Analysis**. Gene Ontology (GO) term averaging (GTA) is performed first on a single experiment (1.2.1) and later can be compared pairwise to other GTA experiments (1.2.2) by generating interactive plots. GTA assigns GO terms an average Z score based on the interaction z-scores (L- or K-based) for all genes in that GO term, and then assigns significance to GO terms with a Z-score above |2| after subtracting the standard deviation. In general, we have observed that GTA tends to identify smaller GO terms that may not have been identified by REMc/GTF. + +**1.2.1 Generate GTA results.** + +Open terminal in or navigate in the terminal to /media/data/Santos\_Sean/Q\_HTCP\_Analysis/. In this folder we will use the ScoreAllGOTerms\_From\_Z\_lm\_V2.R scriptgen to generate GTA scores. Run this script on your experiment using the following arguments/files: + +Rscript ScoreAllGOTerms\_From\_Z\_lm\_V2.R Exp\_Name/Analysis1/ZScores\_Interaction.csv go\_terms.tab gene\_association.sgd Exp\_Name/GTA\_Results/ + +\-Several files will be generated in the specified directory, but the file with all of the GTA results is named “**Average\_GOTerms\_All.csv**” and will be used in the next step of pairwise analysis of experiments. + +![][image23] + +**1.2.2 Pairwise GTA analysis.** Comparing GTA between two experiments will create the interactive plots and associated tables for GTA identified GO terms. This analysis can be performed in the /media/data/Santos\_Sean/Q\_HTCP\_Analysis/ directory using the following scripts: **Compare\_GTF\_Averages\_BetweenScreens\_lm\_v2.R** for L values and **Compare\_GTF\_Averages\_BetweenScreens\_lm\_Kvals\_v2.R** for K values. + +The two scripts take the same arguments (you only need to specify which script is being used for L or K after writing Rscript in the command line) and can be run using the following: + +Rscript Compare\_GTF\_Averages\_BetweenScreens\_lm\_v2.R Exp1/GTA\_Results/ Average\_GOTerms\_All.csv Exp1\_Name Exp2/GTA\_Results/Average\_GOTerms\_All.csv Exp2\_Name Pairwise\_Comparisons/Exp1\_vs\_Exp2/GTA\_L/ + +\-In the Pairwise\_Comparisons directory, you will need to create the folder for your comparison first because the Rscript will only allow you to make one new directory time + +**1.3 Term Specific Heatmaps** + +Open terminal to or navigate in the terminal to /media/data/Santos\_Sean/Q\_HTCP\_Analysis/ and the term specific heatmaps Rscripts are in this directory. + +\-Depending on the number of experiments you want to compare, select one of the following scripts: **GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_V2.R** can be used to compare two experiments, **GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_3terms\_V2.R** for 3 experiments, **GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_4terms\_V2.R** for 4, **GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_5terms\_V2.R** for 5\. + +![][image24] + +Follow the directions for the Rscript appropriate for the number of experiments you are comparing by opening the Rscript in either Rstudio or gedit and looking at what arguments to use. The number of arguments will be greater if a greater number of experiments are being compared. For the GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_V2.R Rscript it can be used in the command line using the following: + +Rscript GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_V2.R Exp1\_ZScores\_Interaction.csv Exp1\_name Exp2\_ZScores\_Interaction.csv Exp2\_Name gene\_ontology\_edit.obo go\_terms.tab All\_SGD\_GOTerms\_for\_QHTCPtk.csv Pairwise\_Comparisons/Exp1\_vs\_Exp2/TermSpecificHeatmaps/ + +\-The All\_SGD\_GOTerms\_for\_QHTCPtk.csv file can be substituted for another file with a list of GO\_IDs if you only want to run it on certain GO\_terms – I’ve created a few examples in the following path: /media/data/Santos\_Sean/Q\_HTCP\_Analysis/Pairwise\_Comparisons/Query\_Term\_Lists/ + +\-For greater than 2 experiments, there will be more arguments to load in the Exp\#\_ZScores\_Interaction.csv and the Exp\#\_Name. + +**3\. Other Rscripts** + +**3.1 Pairwise Venn Diagrams and CPP Correlation plots to compare two experiments.** I generally perform this analysis right after performing GTA and creating a folder for the two experiments I am comparing in the **Pairwise\_Comparisons** folder. Use the Rscript at the following file path: /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**Compare\_Pairwise\_Overlap\_VennDiagrams\_confined\_to\_matches\_V2.R** + +Usage (shown as run from /media/data/Santos\_Sean/Q\_HTCP\_Analysis/): + +Rscript Compare\_Pairwise\_Overlap\_VennDiagrams\_confined\_to\_matches\_V2.R Exp1/Exp1\_Analysis1/Exp1\_InteractionScores.csv Exp1\_Name Exp2/Exp2\_Analysis1/Exp2\_InteractionScores.csv Exp2\_Name Pairwise\_Comparisons/Exp1\_vs\_Exp2/VennDiagrams\_and\_Correlation/ + +This script will output Venn Diagrams for overlap between the two experiments when comparing enhancers and suppressors (defined as Z \> |2|). It will generate lists for the intersecting genes, and genes that are experiment-specific deletion enhancers or suppressors as .csv tables. It will also perform a CPP comparison by plotting the L, K, R, and AUC values across experiments and print a correlation coefficient (R2) for a linear regression fit. It will also create graph plotting the ranked CPP scores against each other. There will be four output folders but using the updated model we only want to look at the “**lm\_both**” results. **Ignore** the “Avg\_Zscore\_Both (alex’s method)” and the “lm\_exp1\_only” and “lm\_exp2\_only” (these compare alex’s method and the updated linear model method). I can remove these extra comparisons if too confusing. + +**3.2 CPP analysis for YKO/YKD/RF for one experiment (all CPPs by all CPPs).** Use the Rscript at the following file path: /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**CPP\_Comaprison\_with\_DAmPs\_and\_RF.R** + +Usage (shown as run from /media/data/Santos\_Sean/Q\_HTCP\_Analysis/): + +Rscript CPP\_Comaprison\_with\_DAmPs\_and\_RF.R ExpName/ExpName\_Analysis1/ExpName\_InteractionScores.csv ExpName/ExpName\_Analysis1/ExpName\_**RF\_InteractionScores**.csv 16\_0531\_DAmPs\_Only.csv ExpName/ExpName\_Analysis1/CPP\_Compare/ + +\-Three PDFs will be created after this analysis: 1\) CPPs only for the YKO. 2\) CPPs from YKO and YKD as different colors. 3\) YKO, YKD and RF as different colors. + +\-After generating these files, I check to make sure that the RF interaction scores generally fall into the range of \+2 to \-2. I also look to see if the DAmPs have a different distribution than the YKO. + +**3.3 Heatmaps with homologs.** Use the Rscript at the following file path: /media/data/Santos\_Sean/GTF\_files/**20\_0328\_heatmaps\_Z\_lm\_wDAmPs\_andHomology.R** + +Uses the following arguments: + +1) EXP\_REMcReady.csv-finalTableWithShift.csv +1) Output folder +1) 17\_0503\_DAmPs\_Only.txt (see path below) +1) Yeast\_Human\_Homology\_Mapping\_biomaRt\_18\_0920.csv (see path below) + +Usage: Rscript 20\_0328\_heatmaps\_Z\_lm\_wDAmPs\_andHomology.R EXP\_REMcReady.csv-finalTableWithShift.csv Heatmaps\_Homologs/ /media/data/Santos\_Sean/GTF\_files/17\_0503\_DAmPs\_Only.txt /media/data/Santos\_Sean/GTF\_files/Yeast\_Human\_Homology\_Mapping\_biomaRt\_18\_0920.csv  + +\-The script will generate a folder with heatmaps including homology info for the genes in the finalTable file and two .csv files, one with all the yeast genes from the original finalTable (even without homologs) and one list only with the homologs. + +**4.0 Appendix.** + +**4.1** **Using R to merge files** (see REMc 1.1 step 1). R can be used to merge two tables using the join function from the plyr package. If you have not installed this package (only need to do this once), open R studio and type install.packages(“plyr”) into the console and follow the instructions to install the package. In R studio modify the following lines by changing the highlighted portion to the path to your files + +\#open required library for the join function + +library(plyr) + +\#read in the files for your experiment + +X1 \<- read.csv(file="FilePath/Exp1\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +X2 \<- read.csv(file=" FilePath/Exp2\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +\#join the two files, **list the larger file first** – in this example X2 has the larger number of genes. + +\#if X1 has a larger number of genes, switch the order of X1 and X2 + +X \<- join(X2,X1,by="OrfRep") + +\#write new file + +write.csv(X,file \= " FilePath/DescriptiveName\_withNAs.csv",row.names=F) + +\-If you need to join more than two tables together you will have to write more than one join. You will first need to join two of the tables, create an object, and then join that object with the next file, and so on… + +See below for an example of multiple files + +\#open required library for the join function + +library(plyr) + +\#read in the files for your experiment + +X1 \<- read.csv(file="FilePath/Exp1\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +X2 \<- read.csv(file=" FilePath/Exp2\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +X3 \<- read.csv(file=" FilePath/Exp3\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +X4 \<- read.csv(file=" FilePath/Exp4\_ZScores\_Interaction.csv",stringsAsFactors \= FALSE) + +\#join the two files, list the larger file first – in this example X2 has the largest number of genes. + +\#if X1 has a larger number of genes, switch the order of X1 and X2 + +X \<- join(X2,X1,by="OrfRep") + +X \<- join(X,X3,by=”OrfRep”) + +X \<- join(X,X4,by=”OrfRep”) + +\#write new file + +write.csv(X,file \= " FilePath/DescriptiveName\_withNAs.csv",row.names=F) + +**5.2 Removing DAmPs** + +Use the R script at the following path: /media/data/Santos\_Sean/Q\_HTCP\_Analysis/Exclude\_DAmPs.R + +Rscript Exclude\_DAmPs.R Input\_File.csv 17\_0503\_DAmPs\_Only.txt output\_file.csv + +Arg 1 – Use any file with OrfRep column that you want to remove the DAmPs from, for example: Zscores\_Interaction.csv or REMcReady.csv, \-finalTable.csv files + +Arg 2 \- /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**17\_0503\_DAmPs\_Only.txt** + +Arg 3 – output file name; make sure to have the .csv extension. + +**\-Alternatively, create a new script using the following script by Copy and pasting the following lines into R and modify the highlighted sections to match the file you want to** + +X \<- read.csv(file="path/FileToRemoveDAmPsFrom.csv",stringsAsFactors \= FALSE) + +Damps \<- read.delim(filepath/**17\_0503\_DAmPs\_Only.txt**",header=F) + +\#create a column in X1 called ORF so we can remove OrfRep numbers and find all the DAmPs + +X$ORF \<- X$OrfRep + +\#remove \_1-4 from newly created ORF column + +X$ORF \<- gsub("\_1","",x=X$ORF) + +X$ORF \<- gsub("\_2","",x=X$ORF) + +X$ORF \<- gsub("\_3","",x=X$ORF) + +X$ORF \<- gsub("\_4","",x=X$ORF) + +X \<- X\[\!(X$ORF %in% Damps$V1),\] + +write.csv(X,file \= "path/output\_file\_noDAmPs.csv",row.names \= FALSE) + +\*the **17\_0503\_DAmPs\_Only.txt** file is in the following path on the server: + +/media/data/Santos\_Sean/GTF\_Analysis/**17\_0503\_DAmPs\_Only.txt** + +\-The above script could also be used to remove any set of genes from another, but you would substitute the 17\_0503\_DAmPs\_Only.txt with a set of OrfReps saved as a tab delimited file with one ORF per line and no header. + +**5.3 Adjust ZScores for YKO only and remove DAmPs** + +\-We discussed a script that would remove the DAmPs and also adjust the ZScores for CPPs to only consider the YKO strains in the z-score calculation. + +Use the file at the following path: /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**Adjust\_YKO\_Zscores\_RemoveDAmPs.R** + +Usage: + +Rscript Adjust\_YKO\_Zscores\_RemoveDAmPs.R ZScores\_Interaction.csv 17\_0503\_DAmPs\_Only.txt AdjustedZScores\_noDAmPs/ + +Arg 1 – Use the Zscores\_Interaction.csv file where you want to adjust the YKO ZScores and remove the damps + +Arg 2 \- /media/data/Santos\_Sean/Q\_HTCP\_Analysis/**17\_0503\_DAmPs\_Only.txt** + +Arg3 – a file path to put the files that will be created into (will create a new ZScores\_Interaction.csv with the adjusted scores in the Z\_lm\_ columns, a scatterplot of the initial Z scores vs adjusted, and new rank plots for the adjusted scores). + +**5.4 Files that are updated from databases.** + +gene\_ontology\_edit.obo \- [www.geneontology.org/ontology/gene\_ontology\_edit.obo](http://www.geneontology.org/ontology/gene\_ontology\_edit.obo) + +SGD\_features.tab \- [https://downloads.yeastgenome.org/curation/chromosomal\_feature/](https://downloads.yeastgenome.org/curation/chromosomal\_feature/) + +go\_terms.tab \- [https://downloads.yeastgenome.org/curation/literature/](https://downloads.yeastgenome.org/curation/literature/) + +\*\*\*\*\*\*\*\*\*\*Adding information about how JH updated files in summer of ‘23: + +Updating Q-HTCP Source Files: + +**gene\_ontology\_edit.obo** + +Direct link to the latest file: +[https://purl.obolibrary.org/obo/go.obo](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpurl.obolibrary.org%2Fobo%2Fgo.obo\&data=05%7C01%7Cjhartman%40uab.edu%7C0cf8e0ce5ae74943d3bd08dbc1388a83%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316220828714336%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=4LRsWT5N44VOOrAX9aby7qWa5ece0NDh6BlhW0r%2FAdA%3D\&reserved=0) + +^^copy this into a new text file and give the same name. + +More info about the file, where it comes from, and the Gene Ontology consortium: +[https://geneontology.org/docs/download-ontology/](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgeneontology.org%2Fdocs%2Fdownload-ontology%2F\&data=05%7C01%7Cjhartman%40uab.edu%7C0cf8e0ce5ae74943d3bd08dbc1388a83%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316220828714336%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=s%2FDjYbaA%2FkK%2FwIKsfGhKnS%2BconR11mkBrnxnlX9QMrw%3D\&reserved=0) + +You can also use SGD's GO Term Finder, which always has the latest ontology and annotations: +[https://www.yeastgenome.org/goTermFinder](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.yeastgenome.org%2FgoTermFinder\&data=05%7C01%7Cjhartman%40uab.edu%7C0cf8e0ce5ae74943d3bd08dbc1388a83%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316220828714336%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=EiFqHvWQI49caPBayi44CbfVfXcqqXjTeN9TMY8%2Faz8%3D\&reserved=0) + + +**SGD\_features.tab** + +Best to use YeastMine – see notes and files from work/yeast strains/ **’23\_0914\_NewMPFile\_Construction.xlsx’** +Need to use the new, updated KO ORF list (made compatible with the final SGD edition of the genome) to get the new gene names, and just replace them in an existing file to update. + + +**Go\_terms.tab** + +The go\_terms.tab file can be generated using YeastMine with a single click. + +From the ‘Retrieve GO Terms’ template ([https://yeastmine.yeastgenome.org/yeastmine/template.do?name=GO\_Terms\_Tab\&scope=all](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fyeastmine.yeastgenome.org%2Fyeastmine%2Ftemplate.do%3Fname%3DGO\_Terms\_Tab%26scope%3Dall\&data=05%7C01%7Cjhartman%40uab.edu%7C2eec1f6ea1c14c0971a408dbc13bc3c9%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316235117030364%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=1KnL5BeJIw3LHr%2F4vy%2FoM1sh%2FBMV0tXQBYtHpaIqHCQ%3D\&reserved=0)), click the green ’Show Results’ button, voila. You can download the file via the ‘Export’ button on the results page. + +The updated Go\_terms.tab file was problematic \- it was different from the original in the style of (col 1\) GO\_ID column entries (e.g., ‘GO:0000001’ instead of ‘1’, and (col 3\) ‘biological\_process’ instead of ‘P’. It also had 122 fewer rows/entries (42887 instead of 4409\). + +The above columns can be fixed by using “text to columns” with ‘:’ as the delimiter, and then doing find/replace for P, F, and C. +File updated 2024\_0125 (42,442 rows). Original had 44,009 and last update 42,887… so numbers are getting smaller. One can find updates at: Gene Ontology FAQ: [https://geneontology.org/docs/faq/\#ontology](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgeneontology.org%2Fdocs%2Ffaq%2F%23ontology\&data=05%7C02%7Cjhartman%40uab.edu%7C4f087305933b4fa99c0a08dc1df646e3%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638418190840735342%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=bkb%2FAv350Z12qddJZeFLDFgxHYxM%2FWHazBOVJ%2BPeO4Q%3D\&reserved=0) + +**gene\_association.sgd** + +The gene\_association.sgd file (‘gaf’) is still served from the Downloads site and can also be accessed even more easily via SGD search. + +Downloads site: +[http://sgd-archive.yeastgenome.org/curation/literature/](https://nam12.safelinks.protection.outlook.com/?url=http%3A%2F%2Fsgd-archive.yeastgenome.org%2Fcuration%2Fliterature%2F\&data=05%7C01%7Cjhartman%40uab.edu%7C4985afdc22f24a1ff21d08dbc1422ebe%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316262252331525%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=Fc4mkGqT2a3UL8gNeObm7huuQR%2FzH1KTWd%2FZjpStlz8%3D\&reserved=0) + +From SGD Search (search for ‘gaf’, then click category ‘Downloads’): +[https://www.yeastgenome.org/search?q=gaf\&category=download\&status=Active](https://nam12.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.yeastgenome.org%2Fsearch%3Fq%3Dgaf%26category%3Ddownload%26status%3DActive\&data=05%7C01%7Cjhartman%40uab.edu%7C4985afdc22f24a1ff21d08dbc1422ebe%7Cd8999fe476af40b3b4351d8977abc08c%7C1%7C0%7C638316262252331525%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C\&sdata=JTDNv83D31LvU25UynBuVor7V6X4z28hpU6Sml3HIbc%3D\&reserved=0) + +\*\*\*\*\*\*\*\*\*\*\*\* + +[image1]: + +[image2]: + +[image3]: + +[image4]: + +[image5]: + +[image6]: + +[image7]: + +[image8]: + +[image9]: + +[image10]: + +[image11]: + +[image12]: + +[image13]: + +[image14]: + +[image15]: + +[image16]: + +[image17]: + +[image18]: + +[image19]: + +[image20]: + +[image21]: + +[image22]: + +[image23]: + +[image24]: \ No newline at end of file diff --git a/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.odt b/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.odt new file mode 100644 index 00000000..426614f4 Binary files /dev/null and b/workflow/docs/20_0329_SS_Q_HTCP_Analysis_Readme.odt differ diff --git a/workflow/docs/EASY Manual 17_0614.odt b/workflow/docs/EASY Manual 17_0614.odt new file mode 100644 index 00000000..36fad5c1 Binary files /dev/null and b/workflow/docs/EASY Manual 17_0614.odt differ diff --git a/workflow/docs/QHTCP - Hartman Lab User's Guide.md b/workflow/docs/QHTCP - Hartman Lab User's Guide.md new file mode 100644 index 00000000..e0736d4a --- /dev/null +++ b/workflow/docs/QHTCP - Hartman Lab User's Guide.md @@ -0,0 +1,821 @@ +**QHTCP \- Hartman Lab User’s Guide** + +**Overview and Introduction to Directory Structure** + +There should be at least 4 subdirectories to organize Q-HTCP data and analysis. The parent directory is simply called ‘Q-HTCP’ and the 4 are subdirectories described below (**Fig. 1**): + +1. ‘ExpJobs’- This directory contains raw image data and image analysis results for the entire collection of Q-HTCP experiments. We recommend each subdirectory within ‘ExpJobs” should represent a single Q-HTCP experiment and be named using the following convention (AB yyyy\_mmdd\_PerturbatationsOfInterest): experimenter initials (‘AB ‘), date (‘yyyy\_mmdd\_’), and brief description (‘drugs\_medias’). Each subdirectory contains the Raw Image Folders for that experiment (a series of N folders with successive integer labels 1 to N, each folder containing the time series of images for a single cell array). It also contains a user-supplied subfolder, which must be named ‘’MasterPlateFiles” and must contain two excel files, one named ‘**DrugMedia\_*experimentdescription*’ and the other named ‘**MasterPlate\_*experimentdescription*’. The bolded part of the file name including the underscore is required. The italicized part is optional description. Generally the ‘DrugMedia\_’ file merits description. If the standard MasterPlate\_Template file is being used, it’s not needed to customize then name. On the other hand if the template is modified, it is recommended to rename it and describe accordingly \- a useful convention is to use the same name for the MP files as given to the experiment (i.e, the parent ExpJobs subdirectory described above) after the underscores. The ‘MasterPlate\_’ file contain associated cell array information (culture IDs for all of the cell arrays in the experiment) while the ‘DrugMedia\_’ file contains information about the media that the cell array is printed to. Together they encapsulate and define the experimental design. The QHTCPImageFolders and ‘MasterPlateFiles’ folder are the inputs for image analysis with EASY software. As further described below, EASY will automatically generate a ‘Results’ directory (within the ExpJobs/‘ExperimentJob’ folder) with a name that consists of a *system-generated timestamp* and an optional short description provided by the user (**Fig.2**). The ‘Results’ directory is created and entered, using the “File \>\> New Experiment” dropdown in EASY. Multiple ‘Results’ files may be created (and uniquely named) within an ‘ExperimentJob’ folder. + +1. ‘EASY’- This directory contains the GUI-enabled MATLAB software to accomplish image analysis and growth curve fitting. EASY analyzes Q-HTCP image data within an ‘ExperimentJob’’ folder (described above; each cell array has its own folder containing its entire time series of images). EASY analysis produces image quantification data and growth curve fitting results for each cell array; these results are subsequently assembled into a single file and labeled, using information contained in the ‘MasterPlate\_’ and ‘DrugMedia\_’ files in the ‘MasterPlateFiles’ subdirectory. The final files (named ‘\!\!ResultsStd\_.txt’ or ‘\!\!ResultsELr\_.txt’) are produced in a subdirectory that EASY creates within the ‘ExperimentJob’ folder, named ‘/Results*TimeStampDesc*/PrintResults’ (**Fig. 2**). The /EASY directory is simply where the latest EASY version resides (additional versions in development or legacy versions may also be stored there). Note: The raw data inputs and result outputs for EASY are kept in the ‘ExpJobs’ directory. EASY also outputs a ‘.mat’ file that is stored in the ‘matResults’ folder and is named with the TimeStamp and user-provided name appended to the ‘Results’ folder name when ‘New Experiment’ is executed from the ‘File’ Dropdown menu in the EASY console. +1. ‘EZview’- This directory contains the GUI-enabled MATLAB software to conveniently and efficiently mine the raw cell array image data for a Q-HTCP experiment. It takes the Results.m file (created by EASY software) as an input and permits the user to navigate through the raw image data and growth curve results for the experiment. The /EZview provides a place for storing the the latest EZview version (as well as other EZview versions). EZview provides a GUI for examining the EASY results as provided in the …/matResults/… .mat file. +1. ‘StudiesQHTCP’ \- A software composite (MATLAB, JAVA, R, Python, Perl, Shell) that takes growth curve results (created by EASY software) as an input and successively generates interaction Z-score results, which are used for graphing gene interactions, Clustering, Gene Ontology analysis, and other ways of interpreting and visualizing the experimental quality and outcomes. {The /StudiesQHTCP folder contains the ordered command line scripts that call sets of other scripts to perform data selection and adaptation from the extracted text results spreadsheet found in the /ExpJobs/*experiment name*/Results…/PrintResults/ folder. In particular the ‘*user customize interactionCode4experiment*.R’ file. It also contains a multitude of R generated plots based on the selected data and possible adaptation. All clustering and Gene ontology analysis are derived from the ‘ZScores\_Interaction.csv’ file found in the/ZScores subdirectory.} +1. ‘Master Plates’ \- This optional folder is a convenient place to store copies of the ‘MasterPlate\_’ and a ‘DrugMedia\_’ file templates, along with previously used files that may have been modified and could be reused or further modified to enable future analyses. These two file types are required in the ‘MasterPlateFiles’ folder, which catalogs experimental information specific to individual Jobs in the ExpJobs folder, as described further below. + + +**ExpJobs** + +1. The ExpJobs folder contains subdirectories, named accordingly for each experiment. Inside the respective experiment directory, folders containing the time series for each cell array that is part of the experiment are named, numerically, ‘1’, ‘2’, …. There should be one folder for each cell array (these are generated at the image collection stage). The images are provided for this Q-HTCP study, which consists of two experiments. In addition to the image folders, each experiment will contain a subdirectory, named ‘MasterPlateFiles’, which must contain two files inside: one is the ‘MasterPlate\_’, and the other is the ‘Drugmedia\_.csv’ file custom names can be appended after the underscore if desired. + +**EASY** + +1. Architecture of the /EASY Subdirectory: + +/EASY + + /figs + + /PTmats + + datatipp.m + + DgenNoGrowthResults200809.m + + DMPexcel2mat\_2023winLinix.m + + EASYconsole.fig + + EASYconsole.m + + NCdisplayGui.m + + NCfitImCFparforFailGbl2.m + + NCscurImCF\_3parfor.m + + NCsingleDisplay.m + + NIcircle.m + + NImParamRadiusGui.m + + NIscanIntensBGpar4GblFnc.m + + p4loop8c.m + + par4Gbl\_Main8c.m + + par4GblFnc8c.m + + + +1. To analyze a new Q-HTCP experiment: +1. **Open the EASY Software**. + 1. Open ‘EstartConsole.m’ with MATLAB + 1. Click the Run icon (play button) + 1. When prompted, click “Change Folder” (do not select “Add to Path”). + 1. In the pop-up display, select from the ‘File’ dropdown: ‘New Experiment’. From the pop-up, choose where to save the new file. Navigate to the relevant job in the ExpJobs folder, name the file accordingly, and click ‘save’. The newly created .mat file in the newly created Results folder will automatically be loaded. The file name will then be automatically appended by the code with the current date information (e.g. ‘A1.mat’ will become ‘Results2023-07-19A1) + 1. If the experiment has already been created, it can be reloaded by clicking ‘Load Experiment’ instead of ‘New Experiment’ and selecting the relevant results + 1. Next, in the pop-up display, click on the ‘Run’ dropdown menu and select ‘Image CurveFit ComboAnalysis’. + 1. In the updated pop-up, choose/highlight all desired image folders for analysis (this is generally all of the folders, since only the ones that need analysis should be there) and then click on ‘continue’. As the program is running, updates will periodically appear in the Command Window; there will be an initial pause at “Before call to NIscanIntens…..”. + 1. When the curve fitting is finished, the EASY console will pop back up. Check to see the completed analysis results in the newly created ‘PrintResults’ Folder, inside of the ‘Results’ Folder. Other folders (‘CFfigs’, ‘figs’, ‘Fotos’) are created for later optional use and will be empty. **\*\*NOTE:** The image analysis is completed independent of labeling the data (strains, media type, etc. Labeling happens next with the ‘GenReports’ function). + 1. Next, click on the ‘GenReports’ dropdown and select ‘DrugMediaMP Generate .mat’ + 1. **\*\*NOTE:** The ‘MasterPlate’ and ‘DrugMedia’ files have very specific formats and should be completed from a template. Additionally, the Masterplate file must be exact (it must contain all and only the strains that were actually tested). For example, if only part of a library is tested, the complete library file must be modified to remove irrelevant strains. + 1. You will be prompted to first select the ‘MasterPlate’ file. You will need to *navigate* away from the working directory to get to it. It is fine for the ‘MasterPlate\_’ file to be .xlsx (or .xls), and if you don’t see it in the popup window, then change the file type from ‘.xls’ to “all files” and then select it. Once it is selected, a report of the number of master plates in the file will pop up; when the report appears, assuming it is correct, click on ‘OK’. + 1. You will then be prompted to select the ‘DrugMedia’ file from the relevant job folder. You will automatically return to the correct prior directory location. Choose it and click ‘OK’. You may see a warning about column headers being modified, but that’s ok. + 1. This will create an additional file in the ‘MasterPlatesFiles’ folder named ‘MPDMmat.mat’ + 1. Finally, click on the ‘GenReports’ dropdown and select ‘Results\_Generate.’ + + + + 1. You will first see ‘\!\!ResultsElr\_.txt’ generated in the ‘PrintResults’ folder. Refreshing will reveal an increasing file size until you see the ‘\!\!ResultsStd\_.txt’ being generated. When finished, the ‘\!\!ResultsStd\_.txt’ will be about the same file size and it should be used in the following StudiesQHTCP analysis. + + 1. ‘NoGrowth\_.txt’, and ‘GrowthOnly\_.txt’ files will be generated in the ‘PrintResults’ folder. + +**System for Multi-QHTCP-Experiment Gene Interaction Profiling Analysis** + +1. **Introductory Remarks** + +“StudiesQHTCP” is a program that incorporates several command line scripts and provides a directory structure for input and output files. + +The analysis system involves Sean Santos’ R code for calculating genetic interaction values and z-scores, clustering of gene interaction z-scores using Recursive Expectation-Maximization clustering (REMc) which relies on WEKA and Java implementation, Go Term Finder (GTF) analyses of the REMc clusters which uses python. Jingu Guo worked on REMc and GTF code and Remy Cron incorporated it into a Java ‘.jar’ file to make it possible to run by multiple users from a shared folder. The executable ‘.jar’ files and all associated Python, Perl, and R scripts are executed via a single master shell script, *REMcMaster3.sh*. \[See section IV.7\] + +1. **System Requirements (software/packages necessary to run StudiesQHTCP)** + 1. Software \- These can all be downloaded from the respective online platforms for each operating system + 1. R + 1. Perl + 1. Java + 1. MATLAB + 1. Packages \- These packages must be installed in a specific order to ensure proper installation. + + **For MacOS:** It is recommended that MacOS users download Homebrew for easy installation of the following packages. The command prompt to download Homebrew followed by the prompts to download the necessary packages are listed below. + +export HOMEBREW\_BREW\_GIT\_REMOTE=[https://github.com/Homebrew/brew](https://github.com/Homebrew/brew) + +/bin/bash \-c "$(curl \-fsSL [https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh](https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh))" + +sudo cpan File::Map +sudo cpan ExtUtils::PkgConfig +sudo cpan GD +brew install graphiz +brew install gd +sudo cpan GO::TermFinder +brew install pdftk-java +brew install pandoc + + **For Linux:** The package manager commands used below are for Debian-based distributions. + +If using Fedora or CentOS, you may need to use ‘dnf’ or ‘yum’ in place of ‘apt-get’ + + sudo cpan File::Map +sudo cpan ExtUtils::PkgConfig +sudo cpan GD +sudo apt-get install graphviz +sudo apt-get install libgd-dev +sudo cpan GO::TermFinder +sudo apt-get install pdftk-java +sudo apt-get install pandoc + +**For R:** + install.packages(“BiocManager”) + BiocManager::install(“org.Sc.sgd.db”) + install.packages(‘ontologyIndex’, dep=TRUE) +install.packages(‘ggrepel’, dep=TRUE) +install.packages(‘tidyverse’, dep=TRUE) +install.packages(‘sos’, dep=TRUE) +install.packages(‘openxlsx’, dep=TRUE) + + + +1. **Proper Architecture of Beginning Subdirectories** + +**/StudiesQHTCP** + +**StudiesDataArchive.txt** + +**/ExpStudy** (user named) + +**/A\_QHTCP Study Design and Notes** + +**/Code** + +22\_0602\_Remy\_DAmPsList.txt + +All\_SGD\_GOTerms\_for\_QHTCPtk.csv + +All\_SGD\_GOTerms.csv + +**/devStuff** + +InteractTemplateB4fixes.R + +InteractTemplateB4Prompt4SDinput.R + +gene\_association.sgd + +gene\_ontology\_edit.obo + +go\_terms.tab + +GTAtemplate.R + +ORFs\_w\_DAmP\_list.txt + +PairwiseLK.R + +Parameters.csv + +**/ScriptTemplates** {preserves starting templates of code modified by user} + +**/BU\_Legacy** + + InteractTemplate.R + +Concatenate\_GTF\_results.py + +Concatenate\_GTF\_resultsB4REMcMaster2.py + +GTAtemplate.R + +InteractionTemplate230119.R + +JoinInteractExps.R + +JoinInteractExps3dev.R + +PairwiseK\_lbl.r + +PairwiseL\_lbl.R + +PairwiseLK.R + +Remy\_yor\_dF\_correlation\_study.R + +TSHeatmaps5dev2.R + +SGD\_features.tab + +SGD\_features.tab.txt + +**/Sscripts** + +18\_0205\_heatmaps\_zscores\_2SD\_color\_ARem\_Z\_lm.R + +22\_0603\_Remy\_Exlcude\_DAmPs.R + +cmd\_Doxo\_SumZScore\_Z\_lm\_Interaction\_d...alidationedit.R + +cmd\_ScoreAllGOTerms\_From\_Z\_lm\_V2.R + +Compare\_GTF\_Averages\_BetweenScreens\_lm\_Kvals\_v2.R + +Compare\_GTF\_Averages\_BetweenScreens\_lm\_Lvals\_v2.R + +Compare\_GTF\_Averages\_BetweenScreens\_lm\_v2.R + +GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_3terms\_V2.R + +GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_4terms\_aging.R + +GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_4terms\_V2.R + +GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_5terms\_V2.R + +GO\_list\_All\_ChildTerms\_lmZscore\_max100child\_Heatmaps\_V2.R + +ScoreAllGOTerms\_From\_Z\_lm\_V2.R + +StudyInfo.csv + +TSHeatmaps5dev2.R + +**/Documentation** + + **\*\*\*ADD IN SEAN’S MANUAL\*\*\*** + +Jingyu\_REMc\_Instruction for clustering and...2013Mar.docx + +**/LegacyDocs** + +QHTCP Analysis SystemRev2.docx + +QHTCP Analysis SystemRev2a.docx + +QHTCP Analysis SystemRev2b.docx + +QHTCP Analysis SystemRev2b0.docx + +QHTCP Analysis SystemRev2c.docx + +**/Exp1** + +**/backups** + +InteractTemplateB4Prompt4SDinput.R + +ExpFrontend.m + +Z\_InteractionTemplate.R + +Notes Exp1 + +**/ZScores** + +**/Exp2** + +**/backups** + +InteractTemplateB4Prompt4SDinput.R + +ExpFrontend.m + +Z\_InteractionTemplate.R + +Notes Exp2 + +**/ZScores** + +**/Exp3** + +**/backups** + +InteractTemplateB4Prompt4SDinput.R + +ExpFrontend.m + +Z\_InteractionTemplate.R + +Notes Exp3 + +**/ZScores** + +**/Exp4** + +**/backups** + +InteractTemplateB4Prompt4SDinput.R + +ExpFrontend.m + +Z\_InteractionTemplate.R + +Notes Exp4 + +**/ZScores** + +/**GTAresults** + +**/Exp1** + +**/Exp2** + +**/Exp3** + +**/Exp4** + +**/REMc** + +AddShiftVals2.R + +DconJG2.py + +GeneByGOAttributeMatrix\_nofiltering-2009Dec07.tab + +**/GTF** + +analyze\_v2.pl + +concatenate\_GTF\_Results.py + +gene\_association.sgd + +gene\_ontology\_edi.obd + +GOontologyPar.sh + +SeanEmailPython2 + +SGD\_features.tab + +SGD\_features.tab.txt + +Terms2tsv\_v4.pl + +**/Component** + +analyze\_v2.pl + +concatenate\_GTF\_Results.py + +gene\_association.sgd + +gene\_ontology\_edi.obd + +ORF\_List\_DAmPs\_Only.txt + +ORF\_List\_Without\_DAmPs.txt + +ORFs\_w\_DAmP\_list.txt + +SGD\_features.tab + +SGD\_features.tab.txt + +terms2tsv\_v4.pl + +**/Function** + +analyze\_v2.pl + +concatenate\_GTF\_Results.py + +gene\_association.sgd + +gene\_ontology\_edi.obd + +ORF\_List\_DAmPs\_Only.txt + +ORF\_List\_Without\_DAmPs.txt + +ORFs\_w\_DAmP\_list.txt + +SGD\_features.tab + +SGD\_features.tab.txt + +terms2tsv\_v4.pl + +**/Process** + +analyze\_v2.pl + +concatenate\_GTF\_Results.py + +gene\_association.sgd + +gene\_ontology\_edi.obd + +ORF\_List\_DAmPs\_Only.txt + +ORF\_List\_Without\_DAmPs.txt + +ORFs\_w\_DAmP\_list.txt + +SGD\_features.tab + +SGD\_features.tab.txt + +terms2tsv\_v4.pl + +jingyuJava\_1\_7\_extractLib.jar + +JoinInteractExps3dev.R + +mComponent.sh + +mFunction.sh + +mProcess.sh + +Notes/ REMc, GTF\_Ontologies and Associated\_Heatmaps + +ORF\_List\_DAmPs\_Only.txt + +ORF\_List\_Without\_DAmPs.txt + +ORFs\_w\_DAmP\_list.txt + +**/REMcHeatmaps** + +**/REMcHeatmapsWithHomolgy** + +17\_0503\_DAmPs\_Only.txt + +**/Homology** + +REMcHeatmaps\_Z\_lm\_wDAmPs\_andHomology\_221212.R + +Yeast\_Human\_Homology\_Mapping\_biomaRt\_18\_0902.csv + +REMcJar2.sh + +REMcJar2old.sh + +REMcMaster2.sh + +REMcMaster3.sh + +**/TermSpecificHeatmaps** + +{Note: The TSHeatmaps… .R contains a \*\*Table\*\* section near the start where is a default set of tables. If the user wishes to use different tables, i.e. (All\_SGD\_GOTerms\_for\_... .csv) that should be modified and the TSH… . R script relabeled to reflect user modification and that is included in the/Code section. Users should always write notes related to code modifications and study goals-strategies. + +**/test-DevStuff** + +Int4DoxGem.R + +InteractionTemplate230119cutdown4compareSSV6.R + +REMcMaster2Bad.sh + +As stated earlier, the user can add folders to back up temporary results, study-related notes, or other related work. However, it is advised to set up and use separate STUDIES when evaluating differing data sets whether that is from experiment results files or from differing data selections in the first interaction … .R script stage. This reduces confusion at the time of the study and especially for those reviewing study analysis in the future. + +1. **How-To Procedure: Execute a Multi-experiment Study** + +To begin, consider the goals of the study and design a strategy of experiments to include in the study. Consider the quality of the experiment runs using EZview to see if there are systematic problems that are readily detectable. In some cases, one may wish to design a ‘pilot’ study for discovery purposes. There is no problem doing that, just take a template study, copy and rename it as XYZpilotStudy etc. However, careful examination of the experimental results using EZview will likely save time in the long run. One may be able to relatively quickly run the interaction Z scores (the main challenge there is the user creation of customized interaction… .R code. I have tried to simplify this by locating the user edits near the top). + +**Preliminary Task** + +1. **Copy the Template directory structure and rename it for your study**. + 1. This directory contains the structure and code for analyzing a multi-experiment study. It contains the code templates and other reference files called by the scripts. + +The user specifies the arrangement of the data (in ‘**StudyInfo.csv**’) by assigning it to /Exp1, /Exp2, /Exp3, or Exp4, which is particularly relevant for clustering as results will be ordered left to right according to experiment number. + +A utility (**ExpFrontend.m**) was made for recording into a spreadsheet (‘**StudiesDataArchive.txt**’) the date and files used (i.e., directory paths to the \!\!Results files used as input for Z-interaction script) for each multi-experiment study. + +**Experiment Specific Interaction Zscores generation** + +**2\. In your files directory, open the /Code folder, edit the ‘StudyInfo.csv’ file** + +1. Enter the desired Experiment names- ***\*\***order the names in the way you want them to appear in the REMc heatmaps; and make sure to run the front end programs (below) in the correct order (e.g., run front end in ‘exp1’ folder to call the \!\!Results file for the experiment you named as exp1 in the StudyInfo.csv file)* + 1. The GTA and pairwise, TSHeatmaps, JoinInteractions and GTF Heatmap scripts use this table to label results and heatmaps in a meaningful way for the user and others. The BackgroundSD and ZscoreJoinSD fields will be filled automatically according to user specifications, at a later step in the QHTCP study process. + + + **3\. Open MATLAB and in the application navigate to each specific /Exp folder, call and execute ExpFrontend.m by *clicking the play icon*. \*\*Use the “Open file” function from within Matlab; do not ‘double click’ on the file from the directory. When prompted, navigate *to the ExpJobs folder and the PrintResults folder within the correct job folder.* Repeat this for every Exp\# folder depending on how many experiments are being performed. The Exp\# folder must correspond to the StudyInfo.csv created above.** + + **Note: Before doing this, it’s a good idea to compare the ref and non-ref CPP average and median values. If they are not approximately equal, then may be helpful to standardize Ref values to the measures of central tendency of the Non-refs, because the Ref CPPs are used for the z-scores, which should be centered around zero.** + 1. This script will copy the \!\!ResultsStd file (located in /PrintResults in the relevant job folder in /ExpJobs ***\*\***rename this \!\!Results file before running front end; we normally use the ‘STD’ (not the ‘ELR’ file)* chosen to the Exp\# directory as can be seen in the “Current Folder” column in MATLAB, and it updates ‘StudiesDataArchive.txt’ file that resides in the /StudiesQHTCP folder. ‘StudiesDataArchive.txt’ is a log of file paths used for different studies, including timestamps. + +Do this to document the names, dates and paths of all the studies and experiment data used in each study. Note, one should only have a single ‘\!\!Results…’ file for each **/Exp**\_ to prevent ambiguity and confusion. If you decide to use a new or different ‘\!\!Results…’ sheet from what was used in a previous “QHTCP Study”, remove the one not being used. NOTE: if you copy a ‘\!\!Results…’ file in by hand, it will not be recorded in the **‘StudiesDataArchive.txt’** file and so will not be documented for future reference. If you use the ExpFrontend.m utility it will append the new source for the raw \!\!Results… to the **‘StudiesDataArchive.txt’** file. + +As stated above, it is advantageous to think about the comparisons one wishes to make so as to order the experiments in a rational way as it relates to the presentation of plots. That is, which results from sheets and selected **‘interaction … .R’**, user modified script, is used in /Exp1, Exp2, Exp3 and Exp4 as explained in the following section. + +**4\. In each /Exp\# folder, rename the Z\_InteractionTemplate.R script according to the experiment focus** + +1. Example: Interaction, Experimenter Initials, Experiment Focus \--\> ‘int\_RM\_2PE.R’ + **5\. Open the renamed interaction script, and edit each one beginning at the ‘++BEGIN USER DATA SELECTION++’** +1. This is designed so that the data of interest for each experiment is appropriately selected from the \!\!Results…txt file +1. The user can edit, step through, and test the R script without running through the whole routine by observing the resultant data table created in RStudio. + 1. The **Z\_InteractionTemplate.R** script has a collection of code lines that have been used for prior analyses (generally to select data from various \!\!Results…txt files), which may be commented out (if not relevant), reused as needed, and/or modified for a new study. These include lines associated with the removal of ‘dAmps’, specific concentrations, and items described in the ‘Specifics’ and ‘Media’, i.e., information specific to a particular experiment design. There are also code lines to replace gene names ‘OCT1/YKL134C’ /’MAY24/YPR153W’ and that get converted to date format in excel, by using only the ORF name and to remove data rows with ‘Blank’ listed; these lines of code convenient to reuse. Hopefully, these code lines can be used, commented out, or adapted to aid the user in modifying this section to the specific data requirements of the study. As a new user data filter code is developed for each ‘Study’ (and vetted), those lines can be added to the InteractionTemplate230119.R code in the **/StudyTemplate** folders to aid in future studies. + + **6\. Open a terminal, navigate to each /Exp\# folder, and execute the (customized) ‘Z\_InteractionTemplate\_…” script by using the command line below:** + + + + + +**Rscript RenamedInteractionTemplate.R \\\!\\\!Results… .txt** + +**\*\*need to change wording to choose SD of Delta\_Background to exclude Data from analysis.** +**\[1\] "Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean"** +**Enter a Standard Deviation value to noise filter \>\>** + +**\[1\] Enter Standard deviation value for removing data for cultures due to high background (e.g., contaminated cultures). Generally set this very high (e.g., ‘20’) on the first run in order NOT to remove data, e.g. ‘20’. Review QC data and inspect raw image data to decide if it is desirable to remove data, and then rerun analysis.** +**Enter a Background SD threshold for EXCLUDING culture data from further analysis:** + +1. The script will request for the user to input a ‘Background Standard Deviation Value’. This Background value removes data where there is high pixel intensity in the background regions of a spot culture (i.e., suspected contamination). 5 is a minimum recommended value, because lower values result in more data being removed, and often times this is undesirable if contamination occurs late after the carrying capacity of the yeast culture is reached. This is most often “trial and error”, meaning there is a ‘Frequency\_Delta\_Background.pdf’ report in the /Exp\_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice). In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value. Background values are reported in the results sheet and so could also be analyzed there.. + + **(For new terminal users, directory navigation tips are described below)** + +1. To navigate to the directory one can use the directory GUI (in X2Go, use the GUI to navigate to desired operating directory and then from the ‘File’ menu, choose “Open in Terminal’) +1. Alternatively, navigate there through the terminal window: ‘pwd’ “prints the current working directory”, ‘ls’ “lists” the subfolders in the current directory. ‘cd’’ followed by the name of the ‘subdirectory’ will move down into it. “cd .. “ changes to the parent directory +1. The tab key can be used to autofill unique characters after typing the initial letters of a folder or file you wish to call. + +The template structure above assists the user with organization and management of Q-HTCP files and provides a uniform directory structure to streamline reference across different users and experiments. + +Since we are systematically comparing perturbations, most Q-HTCP studies will consist of either 2 or 4 experiment subfolders. + +The Zscores files are used for subsequent analyses, including REMc, GTA and Term Specific Heatmaps. These further analyses are described below and can be completed in any order and/or concurrently from separate terminals. + +***\*\*Annotate Files produced and comment out code that produces files that are obsolete or clutter.*** + +**REMc** + +**7\. Navigate to the /REMc directory and run the following Rscript:** + +**\[jwrodger@hartmanlab REMc\]$ sh REMcMaster3.sh** + +1. There is a single shell script which will execute a series of shell script commands that were previously executed individually. To execute, open a terminal in the …/REMc folder and type the following. +1. The command line will request the user to enter a standard deviation multiplier (factor) that will filter the ZScore data accordingly for use with REMc. That value will also be stored to the **StudyInfo.csv** file where the user entered descriptive Labels in at the start of this entire QHTCP study. Those labels are used throughout the process on all the graphics that are produced. + + + +1. The REMcMaster3.sh script will execute the entire process in roughly thirty minutes to possibly an hour. REMcMaster3.sh script tasks are as follows: + 1. Joins the interaction Zscores into a table appropriate for the REMc jar file executable + 1. Execute the REMcJar2.sh which calls the java executable with the appropriate arguments + 1. Add shift columns back to the REMcRdy\_lm\_only.csv-finalTable.csv file to produce the "REMcWithShift.csv" file to be used to produce the REMc Heatmaps task. + 1. Execute **REMcHeatmaps\_zscores.R** contingent upon "REMcWithShift.csv" file being created. + 1. Execute REMcHeatmapsWithHomology/REMcHeatmaps\_Z\_lm\_wDAmPs\_andHomology\_221212.R that is located in …/REMc/REMcHeatmapsWithHomology/REMcHeatmaps\_Z\_lm\_wDAmPs\_andHomology\_221212.R . This is a copy of the R script used by Denver and renamed with all the essential files it needs stored with it. + 1. Execute the process of GTF contingent upon "REMcRdy\_lm\_only.csv-finalTable.csv" being produced by "REMcJar.sh" (Step1). This process involves several tasks as follows: + 1. Execute DconJG2.py to produce the /Process/REMcRdy\_lm\_only directory and files. This is first created in the **../Process** folder and … + 1. Then copied to the **../Function** and **../Component** folders + 1. Next, contingent upon the production the **../REMcRdy\_lm\_only** folder being made, the REMcMaster3.sh script executes the mProcess.sh, mFunction.sh and mComponent.sh tasks within the associated ontology directories. These ontology scripts call the Perl utilities and arguments from the respective ontology folders. These ontology scripts also execute the Concatenate\_GTF\_results.py script to produce the /Process/ProcessResults.txt, /Function/FunctionResults.txt and /Component/ComponentResults.txt output files. + 1. These files are concatenated with a Linux utility: + **Pdftk REMcHeatmaps/\*.pdf \< output path-filename.pdf\>** + +***\*\*Annotate Files produced and comment out code that produces files that are obsolete or clutter*** + +**GTA related work** + +**8\. Navigate to the Code directory and open a terminal to run the following Rscript to produce the GTA results for each Exp\#:** + +**\[jwrodger@hartmanlab Code\]$ Rscript GTAtemplate.R** + +1. It will create the **/GTAresults/Exp\#** directories for the number of experiments for which you have produced Zscore\_interactions.csv and populate them with output files. The script ‘knows’ where to find the interaction files and where to put the results. + + + +**9\. Still in the /Code directory, run the following Rscript, entering two Exp\# files as input arguments to compare:** + +**\[jwrodger@hartmanlab Code\]$ Rscript PairwiseLK.R Exp1 Exp2** + +1. This script will perform both L and K comparisons for the specified experiment folders. Note this could just as easily have been Exp3 and Exp4 or even Exp1 and Exp3. The script ‘knows’ how to label the results as it has the StudyInfo.txt table to assign your labeling convention to the results. The code uses the naming convention of PairwiseCompare\_Exp’\#’-Exp’\#’ to standardize and keep simple the structural naming (where ‘X’ is either K or L and ‘Y’ is the number of the experiment GTA results to be found in **../GTAresult/Exp**\_). The GTA analysis is now complete. {FYI There are also individual scripts that just do the ‘L’ or ‘K’ pairwise studies in the ../Code folder.} + + **Term Specific Heatmaps Production** + +**10\. Navigate to the /Code directory and run the following Rscript to produce the Term Specific Heatmaps:** + +**\[jwrodger@hartmanlab Code\]$ Rscript TSHeatmaps5dev2.R** + +1. The Term Specific Heatmaps are produced directly from the + **../ExpStudy/Exp\_/ZScores/ZScores**\_Interaction.csv file generated by the user modified interaction… .R script. The heatmap labeling is per the names the user wrote into the StudyInfo.txt spreadsheet. +1. Verify that the All\_SGD\_GOTerms\_for\_QHTCPtk.csv found in **../Code** is what you wish to use or if you wish to use a custom modified version. If you wish to use a custom modified version, create it and modify the TSHeatmaps template script (TSHeatmaps5dev2.R) and save it as a ‘TSH\_study specific name’. + + + +**\*\*Naming of ‘StudiesQHTCP/Study/output files.** The resulting files produced in StudiesQHTCP folders have standard file names, which will be the same initially, across all studies. However, when the analysis is complete, and it may be desirable to move some of the results files outside of their native directories, and therefore useful to give them unique and recognizable names. Descriptive names can be added to all files by running two scripts from a terminal after navigating to the corresponding code directory: + +i. “sh RenameZscores\_GTAresults.sh” will add names provided in ‘StudyInfo.csv’ to files in the ‘Zscores’ subdirectory of the respective ‘Exp’ folder and to files in the ‘GTAresults’ folder. + +ii. “sh RenameREMcHtmaps\_GTFfiles.sh” will append the label given by the user when prompted to files in the ‘REMc’ and ‘TermSpecificHeatmaps’ folders. + +[https://weka.sourceforge.io/doc.dev/weka/clusterers/RandomizableDensityBasedClusterer.html](https://weka.sourceforge.io/doc.dev/weka/clusterers/RandomizableDensityBasedClusterer.html) + +\#setSeed-int- the above link is relevant to how REMc results are always the same (presumably because seed selection is non-random). + +**Questions to address / notes to incorporate here or elsewhere:** + +**We need full documentation for all of the current workflow. There are different documents that need to be integrated. This will need to be updated as we make improvements to the system.** + +**In Easy \-** +**MasterPlate\_ file must have ydl227c in orf column, or else it Z\_interaction.R will fail, because it can’t calculate shift values.** +**Make sure there are no special characters; e.g., (), “, ‘, ?, etc.; dash and underscore are ok as delimiters** +**Drug\_Media\_ file must have letter character to be read as ‘text’.** + +**MasterPlate\_ file and DrugMedia\_ are .xlsx or .xls, but \!\!Results\_ is .txt.** + +**In Z\_interactions.R, does it require a zero concentration/perturbation (should we use zero for the low conc, even if it’s not zero), e.g., in order to do the shift correctly.** + +**Need to enable all file types** (not only .xls) as the default for GenerateResults (to select MP and DM files as .xlsx). + +Explore differences between the ELR and STD files \- 24\_0414; John R modified Z script to format ELR file for Z\_interactions.R analysis. + +To keep time stamps when transferring with FileZilla, go to the transfer drop down and turn it on, see [https://filezillapro.com/docs/v3/advanced/preserve-timestamps/](https://filezillapro.com/docs/v3/advanced/preserve-timestamps/) + +Could we change the ‘MasterPlateFiles’ folder label in EASY to ‘MasterPlate\_DrugMedia’ (since there should be only one MP and there is also a DM file required? + +I was also thinking of adding a ‘MasterPlateFilesOnly’ folder to the QHTCP directory template where one could house different MPFiles (e.g., with and without damps, with and without Refs on all MPs, etc; other custom MPFiles, updated versions, etc) + +Currently updated files are in ‘23\_1011\_NewUpdatedMasterPlate\_Files’ on Mac (yeast strains/23\_0914…/) + +For EASY to report cell array positions (plate\_row\_column) to facilitate analyzing plate artifacts. The MP File in Col 3 is called ‘LibraryLocation’ and is reported after ‘Specifics’ in the \!\!Results. + +Can EASY/StudiesQ-HTCP be updated at any time by rerunning with updated MP file (new information for gene, desc, etc)- or maybe better to always start with a new template? + +Need to be aware of file formatting to avoid dates (e.g., with gene names like MAY24, OCT1, etc, and with plate locations 1E1, 1E2, etc)- this has been less of a problem. + +In StudiesQHTCP folders, remember to annotate Exp1, Exp2, in the StudyInfo.csv file. + +Where are gene names called from for labeling REMc heatmaps, TSHeatmaps, Z-interaction graphs, etc? Is this file in the QHTCP ‘code’ folder, or is it in the the results file (and thus ultimately the MP file)? + +Is it ok for a MasterPlate\_ file to have multiple sheets (e.g., readme tab- is only the first tab read in)? +What are the rules for pulling information from the MasterPlateFile to the \!\!Results\_ (e.g., is it the column or the Header Name, etc that is searched? Particular cells in the DrugMedia file?). + +Modifier, Conc are from DM sheet, and refer to the agar media arrays. OrfRep is from MasterPlate\_ File. ‘Specifics’ (Last Column) is experiment specific and accommodate designs involving differences across the multi-well liquid arrays. ‘StrainBkGrd’ (now ‘Library location’) is in the 3rd column and reported after ‘Specifics’ at the last col of the ‘\!\!Results..’ file. + +Do we have / could we make an indicator- work in progress or idle/complete with MP/DM and after gen-report. Now, we can check for the MPDMmat.mat file, or we can look in PrintResults, but would be nice to know without looking there. + +File\>\>Load Experiment wasn’t working (no popup to redirect). Check this again. + +**In EZview**: + +What do the File, Parameters and Tools dropdown menu items do? +What is the ‘Hide’ button for? +What is the ‘composite’ overlay good for? +What is the file that is used for the ‘Info’ function above the Gene Directory. +how to wand over labels \- how does that work in matlab? + +**In StudiesQHTCP:** + +**For front end, be more specific about where to navigate to find results file.** + +**ELR type file errors out \- needs to be produced in a compatible format.** + +**\*\*change wording to “choose SD for Delta\_Background to exclude spot culture growth curve data from interaction analysis”.** + +**GTF:** +Limit to smaller terms. +Enable sort by term size. + +There needs to be an annotated set of MasterPlate File templates. These could be numbered and annotated chronologically, and each experiment could specify which instance of the MP file template is used. When possible/ if necessary, the folders of plate images should be reordered rather than reordering the MP file. Each Exp, should have an ExpDesc spreadsheet in it indicating the Exp Design (summarizing what is expected in the \!\!Results file), based on the ‘MasterPlate\_’ and ‘DrugMedia\_’ files. + +Need to add Ref to Blank positions in the new library construction. + +In EZview: + +John R made a version that runs the original Guide, the ‘exported’, or the ‘migrated’ Forms of the program. A variety of versions didn’t work very well. The original program, with some improvements, seems to work the best. We should try to optimize it. + +{AppDesigner + /mnt/data/EZview/EZview2023/EZviewDev23\_0921POSadaptedOnM4800\_wlapp +Use EZvStartup which calls EZviewGui\_7.mlapp + +GUIDE +/mnt/data/EZview/EZview2023/EZviewDev23\_0919POScleanup4Pub\_MigrationWorkingFileExport\_wlapp +Use the standard EZviewGui.m to start execution} + +Update from John R: +“There are four EZstartup \----.m files. +EZvStartup.m \-Guide version +EZvStartup\_Export.m \-Exported file migration version +EZvStartup\_mlappLaptop.m \-M4800 sized Laptop version +EZvStartup\_mlappServer.m \-Server sized + +You can try them out at your convenience. +I have obviously not tried them out on a Mac Laptop. +Extra files etc. still there. It's a hack and chop job but it seems to work. +Location: +/mnt/data/EZview/EZview2023/EZviewDev23\_1004POSadaptedOnM4800\_wlapp” + +**Suggestions to improve EZview appearance:** +Fix heatmap dimensions to match image dimensions. Is it possible to enable the user to adjust the heatmap dimensions (e.g. by dragging edges or corners to resize its window)? +Have an option to use a fixed heatmap scale across an experiment. +Check chronological experiments. +What does “SpotView” button do? +Can we add scrollbar to RFtab popup window so that it can be resized without losing view of table? + +For **StudiesQ-HTCP**, For GTF, we need to make sure that the correct ORFpool (e.g., with or without damps) is being used. Can that be a selection step in the code, or an additional step to include it in the code (try, etc). + +The Library Locations for E rows in the MasterPlateFiles are being converted to exponentials in the \!\!Results files- needs to be text. One idea is to convert to text and/or use a delimiter. If they have a delimiter, perhaps prefix of ‘mp’ (converting to text) is not needed? e.g., ‘1\_E1’ instead of ‘mp1E1’? + +For TermSpecificHeatMaps, which list of GOterms did we use (see Sean’s manual, p. 14; ‘1.3 Term Specific Heatmaps’). Maybe we need a shorter, or more dedicated list. + +Compare our GTF to that of YeastMine to check for ‘correctness' of updated files? + +In Studies QHTCP new template, edit the StudyInfo.csv on the server in Libre, but leave it as a single column (or choose to open it with comma delimiter) and edit between the commas. But don’t convert in Excel (text to columns \>\> resave), since this deprecates the .csv format and code won’t run anymore and gives a data frame error after the STDEV for background. + +\*\*Consider updating Z\_InteractionTemplate.R in the Studies\_QHTCP template folder if modifications are made for a particular study that could be useful for additional future studies. The idea would be to comment out the study-specific modifications and overwrite the existing program. + +StudiesQHTCP: +RF z-interaction plots don’t include RF2; RF1 only? +We may want to set different z-score cutoffs, based on the shape of the rank plot. +We want to calculate mean and median CPPs for Refs and Non-Ref cultures. May want to adjust REF data so that the median CPP values for Ref cultures are same or close to that to the Non-Ref cultures. +We should regress through the origin for the z-score interaction fitting. +Define NG(no growth), DB(?) and SM(?) on InteractionPlots. + +In MPfile\_templates, replace all YKL227C with YDL227C (120 instances); may only be in the file with Refs added to MPs. + +Update gene by go matrix in REMc (from Dec07 2009\) folder of StudiesQHCP. + +The ‘FrontEnd’ popup message when it is played should say “Select the \!\!Results File (in ‘ExpJobs’ folder)” to avoid confusion / remind about QHTCP structure. + +In REMcMaster3.sh, change prompt to ask for Z-score value (not standard deviation) for filtering analysis. + +Can REMc cluster Aniyia’s data (extract names in place of gene names) does it fail because it can’t do GTF, even though it should be able to simply cluster the Int\_Z-scores and label heatmaps (i.e., do this without doing GTF). + +Also, provide more detailed message when prompted to enter background level in Z\_IntR. + +Output Z\_scoreInt file in same order as InteractionPlots. + +Check all folders in template for updated files; e.g., not just the ‘code’ folder, but also Exp1/2/3/4, REMc and GTF, etc.. + +For EASY, need notification for successful completion after selecting drug media file with prompt ‘labeling complete, you may now generate report’. + +REMc error (BMHonly run): +![][image1] + +REMc- include LibraryLocation info in FinalTable. +This would come from looking up ‘OrfReplicate’ column in ‘MP File’, not ORF name column. +Useful to have demarcator between MP and WellPos, e.g., mp8\_B24 so that clusters can be analyzed for MP artifacts (more concise preferred format is 8\_B24). + +End of GTA in terminal: +![][image2] + + +Full GTA result BMHonly: +![][image3] +The one pdf that is made can’t be opened. + +When we leave out the DAmPs, we should probably still do the 2nd REF plate. + +I go the same result by running the REMcMaster3.sh whether I used Zscore cutoff of 2 or 1 (2PEonly Experiment). Note: I reran the script in the same folder. + +Need new strategy to check for plate artifacts \- calculate REF averages and Medians and compare to non-REF averages and Medians. If need to do corrections, correct by median since these are less impacted by outlier/tails of distributions. + +Heatmaps change to incorrect ones when the print heatmap function is used. + +Don’t we need a program to remove the template files and keep only the results after StudiesQHTCP. + +What is this step \- what package do we need to openxlsx- what failed as a result of not having it? +![][image4] + +**Appendix** + +Notes on the standard EASY coding structure (in Matlab): + +- **/figs**: a folder with two pintool map (‘PT’) files in it +- **/PTmats:** a folder with several .mat parameter files +- **Datatipp.m**: Matlab function to display small text boxes w/ info of a particular data point when the cursor is hovered over it +- **DgenNoGrowthResults200809.m**: script responsible for generating the \!\!Results files(Std & Elr) +- **DMPexcel2mat\_2023winLinix.m**: script that prompts users to select the MasterPlate and DrugMedia files +- **EASYconsole.fig**: This is the figure for the EASYconsole GUIDE. In order to edit the EASYconsole using the GUIDE functionality, enter ‘ guide(“EASYconsole.fig”) ‘ into the command window. Future versions of MATLAB may no longer allow GUIDE editing and APPDESIGNER must be used. +- **EASYconsole.m**: main EASY program script; large center for GUI functionality; created using GUIDE +- **EstartConsole.m**: starter script for EASY; calls EASYconsole.m +- **NCdisplayGui.m**: called when the user accesses the ‘CurveFit Display’ functionality on the console; responsible for the working GUI \[part 1\] +- **NCsingleDisplay.m**: assists in producing a functional GUI when accessing the ‘CurveFit Display’ functionality on the console \[part 2\] +- **NCfitImCFparforFailGbl2.m**: calls NCscurImCF\_3parfor.m; performs curve fitting and data analysis as a set of cultures given their time points and intensity values \>\> produces ‘FitResultsComplete.txt’ in PrintResults folder \[must work in conjunction with ImParamGui.m\] +- **NCscurImCF\_3parfor.m**: part of the computational process when using the ‘Image CurveFit ComboAnalysis’ functionality; performs growth curve fitting using a logistic growth model; performs the curve fit both with and without the early-late-r improved code; collects and consolidates data into structures and that is returned back up through the calling functions +- **NIcircle.m**: generates circular boundaries for image processing (are squares used instead?) +- **NImParamRadiusGui.m**: necessary for the ‘Image CurveFit ComboAnalysis’ computations. +- **NIscanIntensBGpar4**: major part of the computations for ‘Image CurveFit ComboAnalysis’ +- **par4Gbl\_Main8c.m**: preallocation of data structures that are passed into descending function calls; calls the parallel processing parfor loop which loops through all the plate imaging folders +- **p4loop8c.m**: calls par4GblFnc8c.m and passes to it preallocated structures and other variables; passes back the analysis data in renamed structures based preallocated structures to calling script par4Gbl\_main8c.m +- **par4GBLFnc8c.m**: calls NIscanIntensBG4par4.m; compiles returned data; calls NCfitImCFparforFailGb12.m passing data obtained with NIscanIntensBG4par4.m to the curve fit function + +**For PinTool Functionality, the following scripts must be in the EASY folder:** +**\-** NdirectPTGui.m +\-NIPTdirectParmsGui.m +\-NIPTsearchParmsGui.m +\-NImapPT.m +\-NImapPTcentA.m +\-NImapPTcentroidSrc.m +\-NImapPTcentroidSrcCirc.m + +Other code Notes: +\>The Toolbar for the EASYconsole is found in line 61 of EASYconsole.m \>\> ‘figure’ \= on; ‘none’ \= off +\>PlateMapPintool button functionality: line 345 in EASYconsole.m + +![][image5] +![][image6] +![][image7] + +[image1]: + +[image2]: + +[image3]: + +[image4]: + +[image5]: + +[image6]: + +[image7]: \ No newline at end of file diff --git a/workflow/docs/QHTCP - Hartman Lab User's Guide.odt b/workflow/docs/QHTCP - Hartman Lab User's Guide.odt new file mode 100644 index 00000000..ef14693b Binary files /dev/null and b/workflow/docs/QHTCP - Hartman Lab User's Guide.odt differ diff --git a/workflow/script-run-workflow b/workflow/qhtcp-workflow similarity index 99% rename from workflow/script-run-workflow rename to workflow/qhtcp-workflow index 6c9ca1e4..8a895b02 100755 --- a/workflow/script-run-workflow +++ b/workflow/qhtcp-workflow @@ -190,14 +190,18 @@ parse_input() { } # @section Helper functions -# @arg ALL_MODULES array A list of all available modules (that have been passed to module()) +# @arg $1 array A module to initialize (add to ALL_MODULES) +# @set ALL_MODULES array A list of all available modules # @internal module() { debug "Adding $1 module" ALL_MODULES+=("$1") declare -gA "$1" } -# @arg ALL_SUBMODULES array A list of all available modules (that have been passed to module()) + +# @arg $1 array A submodule to initialize (add to ALL_SUBMODULES) +# @set ALL_SUBMODULES array A list of all available submodules +# @internal submodule() { debug "Adding $1 submodule" ALL_SUBMODULES+=("$1") @@ -535,7 +539,7 @@ easy() { # TODO will need to play with the -sd startup option to see what works (well) # Skip this step altogether in auto mode since it requires graphical interaction if ! ((YES)) && ask "Start EASY in MATLAB? This requires a GUI."; then - export SCANS_DIR PROJECT_DATE EASY_DIR PROJECT_USER PROJECT_PREFIX EASY_SUFFIX EASY_RESULTS_DIR MASTER_PLATE_FILE DRUG_MEDIA_FILE + export SCANS_DIR PROJECT_DATE EASY_DIR PROJECT_USER EASY_NAME EASY_SUFFIX EASY_RESULTS_DIR MASTER_PLATE_FILE DRUG_MEDIA_FILE echo "Hit enter to use the default EASY results directory $SCANS_DIR/Results_${PROJECT_PREFIX}_($PROJECT_SUFFIX)" (( YES )) || read -r -p "Or enter a custom suffix: " suffix EASY_RESULTS_DIR="$SCANS_DIR/Results_${PROJECT_PREFIX}_${suffix:-$PROJECT_SUFFIX}" @@ -549,7 +553,8 @@ easy() { fi done EASY_SUFFIX=${EASY_SUFFIX:-$suffix:-$PROJECT_SUFFIX} - EASY_RESULTS_DIR="$SCANS_DIR/Results_${PROJECT_PREFIX}_${EASY_SUFFIX}" + EASY_NAME="${PROJECT_PREFIX}_${EASY_SUFFIX}" + EASY_RESULTS_DIR="$SCANS_DIR/Results_$EASY_NAME" [[ -d $SCANS_DIR/MasterPlateFiles ]] || mkdir -p "$SCANS_DIR/MasterPlateFiles" DRUG_MEDIA_FILE="$SCANS_DIR/MasterPlateFiles/DrugMedia_$PROJECT.xls" MASTER_PLATE_FILE="$SCANS_DIR/MasterPlateFiles/MasterPlate_$PROJECT.xls" @@ -1403,7 +1408,7 @@ main() { # Templates QHTCP_TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp" STUDY_TEMPLATE_DIR="$QHTCP_TEMPLATE_DIR/ExpTemplate" - EASY_DIR="$SCRIPT_DIR/templates/easy" + EASY_DIR="$SCRIPT_DIR/apps/easy" IMAGES="${IMAGES:-"/mnt/data/ExpJobs"}" DATE="$(date +%Y%m%d)" # change general date format here @@ -1468,8 +1473,8 @@ main() { SCANS_DIR="$IMAGES/$PROJECT" PROJECT_DATE="${PROJECT%"${PROJECT#????????}"}" # e.g. 20240723 PROJECT_SUFFIX="${PROJECT#????????_*_}" - PROJECT_USER="${PROJECT#????????_}" - PROJECT_USER="${PROJECT_USER%%_*}" + PROJECT_USER="${PROJECT#????????_}"; PROJECT_USER="${PROJECT_USER%%_*}" + PROJECT_NAME="${PROJECT_DATE}_${PROJECT_USER}_${PROJECT_SUFFIX}" # Run selected modules for m in "${MODULES[@]}"; do ask "Run $m" && "$m"