diff --git a/workflow/apps/r/CompileGTF.R b/workflow/apps/r/CompileGTF.R index 59a952c5..afb9a32f 100644 --- a/workflow/apps/r/CompileGTF.R +++ b/workflow/apps/r/CompileGTF.R @@ -52,4 +52,3 @@ output_file <- file.path(outDir, "GTFCombined.xlsx") # Call the function to combine the files into a workbook with named sheets combineFilesToWorkbook(file_list, output_file) - diff --git a/workflow/apps/r/PairwiseLK.R b/workflow/apps/r/PairwiseLK.R index 61a9b8c5..93ecf5ef 100644 --- a/workflow/apps/r/PairwiseLK.R +++ b/workflow/apps/r/PairwiseLK.R @@ -1,5 +1,5 @@ #!/usr/bin/env Rscript -# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments +# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments # # Updated 240724 Bryan C Roessler to improve file operations and portability # NOTE: The two required arguments are the same and now there are two optional arguments @@ -20,639 +20,916 @@ exp_name <- args[1] exp_name2 <- args[2] if (length(args) >= 3) { - study_info_file <- args[3] + study_info_file <- args[3] } else { - study_info_file <- "StudyInfo.csv" + study_info_file <- "StudyInfo.csv" } -if (length(args) > 4) { +if (length(args) >= 4) { output_dir <- args[4] } else { output_dir <- "gta" } -# if (length(args) > 3) { -# sgd_terms_file <- args[3] -# } else { -# sgd_terms_file <- "../Code/go_terms.tab" -# } - -# if (length(args) > 4) { -# sgd_features_file <- args[4] -# } else { -# sgd_features_file <- "../Code/gene_association.sgd" -# } - -expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name)) -expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2)) -Labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE) -Name1 <- Labels[expNumber1,2] -Name2 <- Labels[expNumber2,2] - +expNumber1 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name)) +expNumber2 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2)) +Labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) +Name1 <- Labels[expNumber1, 2] +Name2 <- Labels[expNumber2, 2] go_terms_file <- "Average_GOTerms_All.csv" -input_file1 <- file.path(output_dir,exp_name,go_terms_file) -input_file2 <- file.path(output_dir,exp_name2,go_terms_file) +input_file1 <- file.path(output_dir, exp_name, go_terms_file) +input_file2 <- file.path(output_dir, exp_name2, go_terms_file) +pairDirL <- file.path(output_dir, paste("PairwiseCompareL_", exp_name, "-", exp_name2, sep = "")) +pairDirK <- file.path(output_dir, paste("PairwiseCompareK_", exp_name, "-", exp_name2, sep = "")) -pairDirL= file.path(output_dir,"PairwiseCompareL_",exp_name,"-",exp_name2) -pairDirK= file.path(output_dir,"PairwiseCompareK_",exp_name,"-",exp_name2) -outPathGTAcompare= file.path(output_dir,"PairwiseCompareL") -outPathGTAcompare[2]= file.path(output_dir,"PairwiseCompareK") -#dir.create(outPathGTAcompare[1]) -dir.create(pairDirL) #(outPathGTAcompare[1]) -dir.create(pairDirK) #(outPathGTAcompare[2]) +# Pairwise L +# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/" -###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL -outputpath <- pairDirL #outPathGTAcompare[1] #Args[5] -#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/" -print("39") -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - +# Theme elements for plots +theme_Publication <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size = unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -scale_fill_Publication <- function(...){ +scale_fill_Publication <- function(...) { library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) + discrete_scale("fill", "Publication", manual_pal( + values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... + ) } -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) +scale_colour_Publication <- function(...) { + discrete_scale("colour", "Publication", manual_pal( + values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... + ) } -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) +theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size = unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) +scale_fill_Publication <- function(...) { + discrete_scale("fill", "Publication", manual_pal( + values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) +scale_colour_Publication <- function(...) { + discrete_scale("colour", "Publication", manual_pal( + values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } -X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE) -X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE) -print(117) +X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE) +X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE) -X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2")) -gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) + +X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2")) +gg <- ggplot(data = X, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Ontology_Avg_X1, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect( + aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), + color = "grey20", + size = 0.25, + alpha = 0.1, + inherit.aes = FALSE, + fill = NA + ) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) -#pgg -print("130") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) - -#ID aggravators and alleviators, regardless of whether they meet 2SD threshold -X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),] -X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),] - -X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),] -X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),] - -Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),] -Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),] - -X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),] -X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),] -print("155") +# ID aggravators and alleviators, regardless of whether they meet 2SD threshold +X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2), ] +X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2), ] +X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2), ] +X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2), ] +Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2), ] +Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2), ] +X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2), ] +X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2), ] X$Overlap_Avg <- NA -try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers") -try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors") -try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_")) +try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name1, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- + paste(Name1, "Specific_Deletion_Suppresors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers") +try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors") +try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_")) - -gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect( + aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), + color = "grey20", + size = 0.25, + alpha = 0.1, + inherit.aes = FALSE, + fill = NA + ) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) -#pgg -print("#2-174") -#2 -fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ] -x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,] #3 -gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = x_rem2_gene, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#3") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #4 -X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),] -gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) + +X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ] +gg <- ggplot(data = X_overlap_nothresold, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#4") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) - -#only output GTA terms where average score is still above 2 after subtracting the SD -#Z1 will ID aggravators, Z2 alleviators +# Only output GTA terms where average score is still above 2 after subtracting the SD +# Z1 will ID aggravators, Z2 alleviators Z1 <- X Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1 Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2 - Z2 <- X Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1 Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2 - -X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),] -X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),] - -X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),] -X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),] - -Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),] -Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),] - -X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),] -X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),] - +X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ] +X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ] +X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ] +X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ] +Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ] +Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ] +X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ] +X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ] X$Overlap <- NA -try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers") -try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors") -try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_")) +try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Suppresors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers") +try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors") +try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <- + paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <- + paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_")) #5 -X_abovethreshold <- X[!(is.na(X$Overlap)),] +X_abovethreshold <- X[!(is.na(X$Overlap)), ] -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#5") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) + #6 -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf", sep = "")), + width = 20, + height = 20 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#6") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) X_abovethreshold$X1_Rank <- NA -X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random") +X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1, ties.method = "random") X_abovethreshold$X2_Rank <- NA -X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random") +X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2, ties.method = "random") #7 -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf", sep = "")), + width = 15, + height = 15 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#7") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) + +fname <- + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #8 -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_L_Avg_X1, + y = Z_lm_L_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_L_SD_X1, + SD_2 = Z_lm_L_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15) + +pdf( + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf", sep = "")), + width = 15, + height = 15 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#8") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname)) +fname <- + file.path(pairDirL, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) -print("write csv files L") -write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE) -write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE) -#End of L GTA Pairwise Compare +write.csv( + x = X, + file.path(pairDirL, paste("All_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv", sep = "")), + row.names = FALSE +) +write.csv( + x = X_abovethreshold, + file = file.path(pairDirL, paste("AboveThreshold_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv", sep = "")), + row.names = FALSE +) - -###########BEGIN PAIRWISE K-----KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK -#define the output path (as fourth argument from Rscript) -outputpath <- pairDirK #outPathGTAcompare[2] #Args[5] - -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) +# Begin Pairwsise K +# Theme elements for plots +theme_Publication <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size = unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -scale_fill_Publication <- function(...){ - library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) -} -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) +scale_fill_Publication <- function(...) { + library("scales") + discrete_scale( + "fill", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) +scale_colour_Publication <- function(...) { + discrete_scale( + "colour", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) -} -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) +theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size = unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE) +scale_fill_Publication <- function(...) { + discrete_scale( + "fill", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) +} -X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE) +scale_colour_Publication <- function(...) { + discrete_scale( + "colour", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) +} + +X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE) +X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE) #1 -X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2")) +X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2")) -gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) + +gg <- ggplot(data = X, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Ontology_Avg_X1, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("127") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #2 -#ID aggravators and alleviators, regardless of whether they meet 2SD threshold -X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),] -X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),] - -X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),] -X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),] - -Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),] -Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),] - -X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),] -X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),] - +# ID aggravators and alleviators, regardless of whether they meet 2SD threshold +X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2), ] +X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2), ] +X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2), ] +X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2), ] +Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2), ] +Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2), ] +X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2), ] +X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2), ] X$Overlap_Avg <- NA -try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors") -try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers") -try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_")) +try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name1, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- + paste(Name1, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- + "Overlapping_Deletion_Suppressors") +try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- + "Overlapping_Deletion_Enhancers") +try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <- + paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_")) - -plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="") -gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +plotly_path <- file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html", sep = "")) +gg <- ggplot(data = X, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) -#pgg -print("#2-170") + #2 -fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #3 -x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,] -plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="") -gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ] +plotly_path <- file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html", sep = "")) +gg <- ggplot(data = x_rem2_gene, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#3") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #4 -X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),] -gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) + +X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ] +gg <- ggplot(data = X_overlap_nothresold, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap_Avg, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#4") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) + +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #5 -#only output GTA terms where average score is still above 2 after subtracting the SD -#Z1 will ID aggravators, Z2 alleviators +# Only output GTA terms where average score is still above 2 after subtracting the SD +# Z1 will ID aggravators, Z2 alleviators Z1 <- X Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1 Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2 - Z2 <- X Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1 Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2 - - -X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),] -X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),] - -X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),] -X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),] - -Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),] -Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),] - -X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),] -X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),] - +X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ] +X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ] +X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ] +X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ] +Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ] +Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ] +X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ] +X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ] X$Overlap <- NA -try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors") -try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers") -try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_")) -try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_")) +try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- + paste(Name1, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- + paste(Name1, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- + paste(Name2, "Specific_Deletion_Suppressors", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- + paste(Name2, "Specific_Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- + "Overlapping_Deletion_Suppressors") +try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- + "Overlapping_Deletion_Enhancers") +try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <- + paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_")) +try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <- + paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_")) -X_abovethreshold <- X[!(is.na(X$Overlap)),] -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +X_abovethreshold <- X[!(is.na(X$Overlap)), ] +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf", sep = "")), + width = 12, + height = 8 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#5") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #6 -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf", sep = "")), + width = 20, + height = 20 +) gg dev.off() pgg <- ggplotly(gg) #pgg -print("#6") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #7 X_abovethreshold$X1_Rank <- NA -X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random") +X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1, ties.method = "random") X_abovethreshold$X2_Rank <- NA -X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random") +X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2, ties.method = "random") - -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf", sep = "")), + width = 15, + height = 15 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#7") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- + file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) #8 -gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + - xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) + - geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) + - geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) + - ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) + +gg <- ggplot(data = X_abovethreshold, aes( + x = Z_lm_K_Avg_X1, + y = Z_lm_K_Avg_X2, + color = Overlap, + Term = Term_Avg, + Genes = Genes_Avg_X1, + NumGenes = NumGenes_Avg_X1, + AllPossibleGenes = AllPossibleGenes_Avg_X1, + SD_1 = Z_lm_K_SD_X1, + SD_2 = Z_lm_K_SD_X2 +)) + + xlab(paste("GO Term Avg lm Z for ", Name1, sep = "")) + + geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) + + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + + geom_point(shape = 3, size = 3) + + ylab(paste("GO Term Avg lm Z for ", Name2, sep = "")) + + ggtitle(paste("Comparing Average GO Term Z lm for ", Name1, " vs. ", Name2, sep = "")) + theme_Publication_legend_right() -pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15) + +pdf( + file.path(pairDirK, paste("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf", sep = "")), + width = 15, + height = 15 +) + gg dev.off() pgg <- ggplotly(gg) #pgg -print("#8") -fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="") -print(fname) -htmlwidgets::saveWidget(pgg, file.path(getwd(),fname)) -file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname)) +fname <- + file.path(pairDirK, paste("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html", sep = "")) +htmlwidgets::saveWidget(pgg, fname) -print("write csv files") -write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE) -write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE) +write.csv( + x = X, + file = file.path(pairDirK, paste("All_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv", sep = "")), + row.names = FALSE +) -#End of GTA Pairwise compare for K values +write.csv( + x = X_abovethreshold, + file = file.path(pairDirK, paste("AboveThreshold_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv", sep = "")), + row.names = FALSE +) diff --git a/workflow/apps/r/TSHeatmaps5dev2.R b/workflow/apps/r/TSHeatmaps5dev2.R index e7478540..fa5db68f 100644 --- a/workflow/apps/r/TSHeatmaps5dev2.R +++ b/workflow/apps/r/TSHeatmaps5dev2.R @@ -36,659 +36,712 @@ base_dir <- args[6] output_dir <- args[7] study_nums <- args[8:length(args)] -#import standard tables used in Sean's code That should be copied to each ExpStudy -labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE) -Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal") -GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term -Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition")) -XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE) -XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="") -XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2]) -XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2]) +# Import standard tables used in Sean's code That should be copied to each ExpStudy +labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE) +Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal") +GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) # all ORFs associated with GO term +Terms <- read.delim(file = sgd_terms_tfile, + header = FALSE, + quote = "", + col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition") +) +XX3 <- read.csv(file = all_sgd_terms_csv, stringsAsFactors = FALSE, header = TRUE) +XX3[, 1] <- paste("GO:", formatC(XX3[, 1], width = 7, flag = "0"), sep = "") +XX3[, 2] <- gsub(pattern = " ", replacement = "_", x = XX3[, 2]) +XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2]) # Load input files for (study_num in study_nums) { - input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file) + input_file <- file.path(base_dir, paste("Exp", study_num), zscores_file) if (file.exists(input_file)) { - assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE)) - assign(paste(Name, study_num), labels[study_num,2]) + assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE)) + assign(paste(Name, study_num), labels[study_num, 2]) } } for (study_num in study_nums) { - eval(paste("function",study_num)) + eval(paste("function", study_num)) } if (length(study_nums) > 0) { X1$ORF <- X1$OrfRep - X1$ORF <- gsub("_1","",x=X1$ORF) - X1$ORF <- gsub("_2","",x=X1$ORF) - X1$ORF <- gsub("_3","",x=X1$ORF) - X1$ORF <- gsub("_4","",x=X1$ORF) + X1$ORF <- gsub("_1", "", x = X1$ORF) + X1$ORF <- gsub("_2", "", x = X1$ORF) + X1$ORF <- gsub("_3", "", x = X1$ORF) + X1$ORF <- gsub("_4", "", x = X1$ORF) X1$Score_L <- "No Effect" - try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth") - try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer") - try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor") + try(X1[is.na(X1$Z_lm_L), ]$Score_L <- "No Growth") + try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer") + try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor") X1$Score_K <- "No Effect" - try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth") - try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor") - try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer") + try(X1[is.na(X1$Z_lm_K), ]$Score_K <- "No Growth") + try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor") + try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer") - #express the na data as 0.001 in X1 for K and L - X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001 - X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001 + # Express the na data as 0.001 in X1 for K and L + X1[is.na(X1$Z_lm_L), ]$Z_lm_L <- 0.001 + X1[is.na(X1$Z_lm_K), ]$Z_lm_K <- 0.001 X1$Rank_L <- rank(X1$Z_lm_L) X1$Rank_K <- rank(X1$Z_lm_K) - - X1 <- X1[order(X1$OrfRep,decreasing = FALSE),] - colnames(X1) <- paste(colnames(X1),"_X1",sep="") + X1 <- X1[order(X1$OrfRep, decreasing = FALSE), ] + colnames(X1) <- paste(colnames(X1), "_X1", sep = "") } if (length(study_nums) > 1) { X2$ORF <- X2$OrfRep - X2$ORF <- gsub("_1","",x=X2$ORF) - X2$ORF <- gsub("_2","",x=X2$ORF) - X2$ORF <- gsub("_3","",x=X2$ORF) - X2$ORF <- gsub("_4","",x=X2$ORF) + X2$ORF <- gsub("_1", "", x = X2$ORF) + X2$ORF <- gsub("_2", "", x = X2$ORF) + X2$ORF <- gsub("_3", "", x = X2$ORF) + X2$ORF <- gsub("_4", "", x = X2$ORF) X2$Score_L <- "No Effect" - try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth") - try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer") - try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor") + try(X2[is.na(X2$Z_lm_L), ]$Score_L <- "No Growth") + try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer") + try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor") X2$Score_K <- "No Effect" - try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth") - try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor") - try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer") + try(X2[is.na(X2$Z_lm_K), ]$Score_K <- "No Growth") + try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor") + try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer") #express the na data as 0.001 in X2 - X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001 - X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001 + X2[is.na(X2$Z_lm_L), ]$Z_lm_L <- 0.001 + X2[is.na(X2$Z_lm_K), ]$Z_lm_K <- 0.001 X2$Rank_L <- rank(X2$Z_lm_L) X2$Rank_K <- rank(X2$Z_lm_K) - X2 <- X2[order(X2$OrfRep,decreasing = FALSE),] - colnames(X2) <- paste(colnames(X2),"_X2",sep="") - X <- cbind(X1,X2) + X2 <- X2[order(X2$OrfRep, decreasing = FALSE), ] + colnames(X2) <- paste(colnames(X2), "_X2", sep = "") + X <- cbind(X1, X2) } if (length(study_nums) > 2) { X3$ORF <- X3$OrfRep - X3$ORF <- gsub("_1","",x=X3$ORF) - X3$ORF <- gsub("_2","",x=X3$ORF) - X3$ORF <- gsub("_3","",x=X3$ORF) - X3$ORF <- gsub("_4","",x=X3$ORF) + X3$ORF <- gsub("_1", "", x = X3$ORF) + X3$ORF <- gsub("_2", "", x = X3$ORF) + X3$ORF <- gsub("_3", "", x = X3$ORF) + X3$ORF <- gsub("_4", "", x = X3$ORF) X3$Score_L <- "No Effect" - try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth") - try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer") - try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor") + try(X3[is.na(X3$Z_lm_L), ]$Score_L <- "No Growth") + try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer") + try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor") X3$Score_K <- "No Effect" - try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth") - try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor") - try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer") - #express the na data as 0.001 in X3 - X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001 - X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001 + try(X3[is.na(X3$Z_lm_K), ]$Score_K <- "No Growth") + try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor") + try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer") + # Express the na data as 0.001 in X3 + X3[is.na(X3$Z_lm_L), ]$Z_lm_L <- 0.001 + X3[is.na(X3$Z_lm_K), ]$Z_lm_K <- 0.001 X3$Rank_L <- rank(X3$Z_lm_L) X3$Rank_K <- rank(X3$Z_lm_K) - - X3 <- X3[order(X3$OrfRep,decreasing = FALSE),] - colnames(X3) <- paste(colnames(X3),"_X3",sep="") - X <- cbind(X,X3) -} + X3 <- X3[order(X3$OrfRep, decreasing = FALSE), ] + colnames(X3) <- paste(colnames(X3), "_X3", sep = "") + X <- cbind(X, X3) +} if (length(study_nums) > 3) { X4$ORF <- X4$OrfRep - X4$ORF <- gsub("_1","",x=X4$ORF) - X4$ORF <- gsub("_2","",x=X4$ORF) - X4$ORF <- gsub("_3","",x=X4$ORF) - X4$ORF <- gsub("_4","",x=X4$ORF) + X4$ORF <- gsub("_1", "", x = X4$ORF) + X4$ORF <- gsub("_2", "", x = X4$ORF) + X4$ORF <- gsub("_3", "", x = X4$ORF) + X4$ORF <- gsub("_4", "", x = X4$ORF) X4$Score_L <- "No Effect" - try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth") - try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer") - try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor") + try(X4[is.na(X4$Z_lm_L), ]$Score_L <- "No Growth") + try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer") + try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor") X4$Score_K <- "No Effect" - try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth") - try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor") - try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer") - #express the na data as 0.001 in X4 - X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001 - X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001 + try(X4[is.na(X4$Z_lm_K), ]$Score_K <- "No Growth") + try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor") + try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer") + # Express the na data as 0.001 in X4 + X4[is.na(X4$Z_lm_L), ]$Z_lm_L <- 0.001 + X4[is.na(X4$Z_lm_K), ]$Z_lm_K <- 0.001 X4$Rank_L <- rank(X4$Z_lm_L) X4$Rank_K <- rank(X4$Z_lm_K) - - X4 <- X4[order(X4$OrfRep,decreasing = FALSE),] - colnames(X4) <- paste(colnames(X4),"_X4",sep="") - X <- cbind(X,X4) + X4 <- X4[order(X4$OrfRep, decreasing = FALSE), ] + colnames(X4) <- paste(colnames(X4), "_X4", sep = "") + X <- cbind(X, X4) } if (length(study_nums) > 4) { X5$ORF <- X5$OrfRep - X5$ORF <- gsub("_1","",x=X5$ORF) - X5$ORF <- gsub("_2","",x=X5$ORF) - X5$ORF <- gsub("_3","",x=X5$ORF) - X5$ORF <- gsub("_4","",x=X5$ORF) + X5$ORF <- gsub("_1", "", x = X5$ORF) + X5$ORF <- gsub("_2", "", x = X5$ORF) + X5$ORF <- gsub("_3", "", x = X5$ORF) + X5$ORF <- gsub("_4", "", x = X5$ORF) X5$Score_L <- "No Effect" - try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth") - try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer") - try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor") + try(X5[is.na(X5$Z_lm_L), ]$Score_L <- "No Growth") + try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer") + try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor") X5$Score_K <- "No Effect" - try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth") - try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor") - try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer") - #express the na data as 0.001 in X5 - X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001 - X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001 + try(X5[is.na(X5$Z_lm_K), ]$Score_K <- "No Growth") + try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor") + try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer") + # Express the na data as 0.001 in X5 + X5[is.na(X5$Z_lm_L), ]$Z_lm_L <- 0.001 + X5[is.na(X5$Z_lm_K), ]$Z_lm_K <- 0.001 X5$Rank_L <- rank(X5$Z_lm_L) X5$Rank_K <- rank(X5$Z_lm_K) - - X5 <- X5[order(X5$OrfRep,decreasing = FALSE),] - colnames(X5) <- paste(colnames(X5),"_X5",sep="") - X <- cbind(X,X5) + X5 <- X5[order(X5$OrfRep, decreasing = FALSE), ] + colnames(X5) <- paste(colnames(X5), "_X5", sep = "") + X <- cbind(X, X5) } X$ORF <- X$OrfRep_X1 if (length(study_nums) > 1) { - - X$ORF <- gsub("_1","",x=X$ORF) + X$ORF <- gsub("_1", "", x = X$ORF) + try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1) + try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2) + X_heatmap <- + X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"] - try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1) - try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2) - X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ] - - X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)] - colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap)) + X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)] + colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap)) colnames(X_heatmap)[2] <- "Gene" } - if (length(study_nums) > 2) { - X$ORF <- gsub("_1","",x=X$ORF) - X$ORF <- gsub("_2","",x=X$ORF) + X$ORF <- gsub("_1", "", x = X$ORF) + X$ORF <- gsub("_2", "", x = X$ORF) + try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1) + try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2) + try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3) - try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1) - try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2) - try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3) - - X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ] - #Reorder columns - X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three - colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap)) + X_heatmap <- + X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"] + + # Reorder columns + X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)] + colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap)) colnames(X_heatmap)[2] <- "Gene" } if (length(study_nums) > 3) { - X$ORF <- gsub("_1","",x=X$ORF) - X$ORF <- gsub("_2","",x=X$ORF) - X$ORF <- gsub("_3","",x=X$ORF) + X$ORF <- gsub("_1", "", x = X$ORF) + X$ORF <- gsub("_2", "", x = X$ORF) + X$ORF <- gsub("_3", "", x = X$ORF) + try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1) + try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2) + try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3) + try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4) - try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1) - try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2) - try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3) - try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4) - - X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | - colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ] - #Reorder columns - X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four - colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap)) + X_heatmap <- + X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | + colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"] + + # Reorder columns + X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)] + colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap)) colnames(X_heatmap)[2] <- "Gene" } - if (length(study_nums) > 4) { - X$ORF <- gsub("_1","",x=X$ORF) - X$ORF <- gsub("_2","",x=X$ORF) - X$ORF <- gsub("_3","",x=X$ORF) - X$ORF <- gsub("_4","",x=X$ORF) - - try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1) - try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2) - try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3) - try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4) - try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5) - - X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | - colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | - colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | - colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | - colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | - colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" | - colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | - colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | - colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | - colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" | - colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"] - #Reorder columns - X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)] - colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap)) - colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap)) + X$ORF <- gsub("_1", "", x = X$ORF) + X$ORF <- gsub("_2", "", x = X$ORF) + X$ORF <- gsub("_3", "", x = X$ORF) + X$ORF <- gsub("_4", "", x = X$ORF) + try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1) + try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2) + try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3) + try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4) + try(X[X$Gene_X5 == "", ]$Gene_X5 <- X[X$Gene_X5 == "", ]$OrfRep_X5) + X_heatmap <- + X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | + colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | + colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | + colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" | + colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" | + colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" | + colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | + colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" | + colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" | + colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" | + colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"] + + # Reorder columns + X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)] + colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap)) + colnames(X_heatmap) <- gsub(pattern = "X5", replacement = Name5, colnames(X_heatmap)) colnames(X_heatmap)[2] <- "Gene" } - -#theme elements for plots -theme_Publication <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - legend.key.size= unit(0.2, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - +# Theme elements for plots +theme_Publication <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + legend.key.size = unit(0.2, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -scale_fill_Publication <- function(...){ +scale_fill_Publication <- function(...) { library(scales) - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - + discrete_scale( + "fill", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - +scale_colour_Publication <- function(...) { + discrete_scale( + "colour", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } - -theme_Publication_legend_right <- function(base_size=14, base_family="sans") { - (theme_foundation(base_size=base_size, base_family=base_family) - + theme(plot.title = element_text(face = "bold", - size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold",size = rel(1)), - axis.title.y = element_text(angle=90,vjust =2), - axis.title.x = element_text(vjust = -0.2), - axis.text = element_text(), - axis.line = element_line(colour="black"), - axis.ticks = element_line(), - panel.grid.major = element_line(colour="#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "right", - legend.direction = "vertical", - legend.key.size= unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face="italic"), - plot.margin=unit(c(10,5,5,5),"mm"), - strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"), - strip.text = element_text(face="bold") - )) - +theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") { + (theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(), + axis.line = element_line(colour = "black"), + axis.ticks = element_line(), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "right", + legend.direction = "vertical", + legend.key.size = unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) + ) } -scale_fill_Publication <- function(...){ - discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - +scale_fill_Publication <- function(...) { + discrete_scale( + "fill", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } -scale_colour_Publication <- function(...){ - discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...) - +scale_colour_Publication <- function(...) { + discrete_scale( + "colour", + "Publication", + manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), + ... + ) } - -Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal") +Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal") print(Ontology) -#all ORFs associated with GO term + +# All ORFs associated with GO term GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) -#Terms is the GO term list jwr moved up to TAABLES -Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition")) +# Terms is the GO term list jwr moved up to TAABLES +Terms <- read.delim(file = sgd_terms_tfile, + header = FALSE, + quote = "", + col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition") +) +colormapbreaks <- c(-12, -10, -8, -6, -4, -2, 2, 4, 6, 8, 10, 12) -#BIG LOOP BIG LOOP ------------------------------------------------------ -colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12) -for(s in 1:dim(XX3)[1]){ - #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal") - #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything") - #GO_ID_Arg <- "GO:0006325" - GO_ID_Arg_loop <- as.character(XX3[s,1]) - GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop) - #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325") - #only make plots if parent term has fewer than 500 children - +for (s in 1:dim(XX3)[1]) { + # Ontology <- + # get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo", + # propagate_relationships = "is_a", extract_tags = "minimal") + # Ontology_Everything <- + # get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo", + # propagate_relationships = "is_a", extract_tags = "everything") + # + # GO_ID_Arg <- "GO:0006325" + GO_ID_Arg_loop <- as.character(XX3[s, 1]) + GOTerm_parent <- get_descendants(Ontology, roots = GO_ID_Arg_loop) + # GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325") + # Only make plots if parent term has fewer than 500 children Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]])) - if(length(GOTerm_parent) > 100){ + if (length(GOTerm_parent) > 100) { next() } - Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]])) - if(Parent_Size < 2){ + Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]])) # TODO why twice? + + if (Parent_Size < 2) { next() } - if(Parent_Size > 2000){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size > 2000) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 45, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 1000 && Parent_Size <= 2000){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + if (Parent_Size >= 1000 && Parent_Size <= 2000) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 35, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 500 && Parent_Size <= 1000){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 500 && Parent_Size <= 1000) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 30, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 200 && Parent_Size <= 500){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 200 && Parent_Size <= 500) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 25, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 100 && Parent_Size <= 200){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 100 && Parent_Size <= 200) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 20, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 60 && Parent_Size <= 100){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 60 && Parent_Size <= 100) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 15, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 30 && Parent_Size <= 60){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 30 && Parent_Size <= 60) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 10, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + # ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } - if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + #ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size >= 3 && Parent_Size <= 30){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size >= 3 && Parent_Size <= 30) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + # ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } - if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + # ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() } - if(Parent_Size == 2){ - pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE) - for(i in 1:length(GOTerm_parent)){ + + if (Parent_Size == 2) { + pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE) + for (i in 1:length(GOTerm_parent)) { GO_Term <- GOTerm_parent[i] - GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2]) - GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term) - #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,] + GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2]) + GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term) + # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ] All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]]) - Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,] - X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]]) - if(dim(Genes_Annotated_to_Term)[1] > 2){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ] + X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]]) + if (dim(Genes_Annotated_to_Term)[1] > 2) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + # ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } - if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){ - try(heatmap.2(x=X0, - Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust, - dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", - breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1, - ylab = "Gene", - cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE, - keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8), - na.color="red", col=brewer.pal(11,"PuOr"), - main=GO_Term_Name, - #ColSideColors=ev_repeat, - labRow=as.character(Genes_Annotated_to_Term$Gene))) + if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) { + try(heatmap.2( + x = X0, + Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust, + dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none", + breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1, + ylab = "Gene", + cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE, + keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8), + na.color = "red", col = brewer.pal(11, "PuOr"), + main = GO_Term_Name, + # ColSideColors = ev_repeat, + labRow = as.character(Genes_Annotated_to_Term$Gene) + )) } } dev.off() diff --git a/workflow/apps/r/gtaTemplate.R b/workflow/apps/r/gtaTemplate.R index ae0eaf3b..af1db83c 100644 --- a/workflow/apps/r/gtaTemplate.R +++ b/workflow/apps/r/gtaTemplate.R @@ -1,9 +1,9 @@ -#!/usr/bin/env R +#!/usr/bin/env R # GTA (GoTermAveraging) # Your output may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes # # Updated 240724 Bryan C Roessler to improve file operations and portability -# NOTE: The script now has 2 additional OPTIONAL arguments: +# NOTE: The script now has 2 additional OPTIONAL arguments: # 1. Path to SGD terms file (go.terms.tab) # 2. Path to SGD features file (gene_association.sgd) @@ -40,160 +40,157 @@ if (length(args) >= 5) { output_dir <- "../../out/gta" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd } - - # # Set SGDgeneList file path # if (length(args) > 4) { # SGDgeneList <- args[4] # } else { # SGDgeneList <- "../Code/SGD_features.tab" - -#Begin for loop for experiments in this study-----------------ZScores_Interaction.csv -for(m in 1:length(zscores_file)){ +# Begin for loop for experiments in this study +# ZScores_Interaction.csv +for (m in 1:length(zscores_file)) { #zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1] - X <- read.csv(file = zscores_file[m],stringsAsFactors=FALSE,header = TRUE) + X <- read.csv(file = zscores_file[m], stringsAsFactors = FALSE, header = TRUE) - if(colnames(X)[1] == "OrfRep"){ + if (colnames(X)[1] == "OrfRep") { colnames(X)[1] <- "ORF" } #Terms is the GO term list - Terms <- read.delim(file = sgd_terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition")) + Terms <- read.delim(file = sgd_terms_file, header = FALSE, quote = "", + col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition")) #all ORFs associated with GO term GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #Gene_Association is the gene association to GO term file - Gene_Association <- read.delim(sgd_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty")) + Gene_Association <- read.delim(sgd_features_file, skip = 8, header = FALSE, quote = "", + col.names = c("Database", "Database_Object_ID", "Database_Object_Symbol", "NOT", "GO_ID", + "Database_Reference", "Evidence", "With_or_From", "Aspect", "Database_Object_Name", + "Database_Object_Synonym", "Database_Object_Type", "taxon", "Date", "Assigned_By", "OtherInfo", "Empty" + ) + ) #Get the ORF names associated with each gene/GO term - Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1] + Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym), "\\|", 2)[, 1] #Get the numeric GO ID for matching - Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2]) - #get all unique GO terms + Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID), "\\:", 2)[, 2]) + #Get all unique GO terms GO_Terms <- unique(Gene_Association$GO_ID) - #create a character vector with just the ColNames of the input file to store the scores for each GO term + #Create a character vector with just the ColNames of the input file to store the scores for each GO term Col_Names_X <- colnames(X) - #create a data_frame with header from input_file - GO_Term_Averages <- X[0,] - #fill table with NAs same length as number of GO terms - GO_Term_Averages[1:length(GO_Terms),] <- NA - #change the first and second col names to GO_ID and Term + #Create a data_frame with header from input_file + GO_Term_Averages <- X[0, ] + #Fill table with NAs same length as number of GO terms + GO_Term_Averages[1:length(GO_Terms), ] <- NA + #Change the first and second col names to GO_ID and Term colnames(GO_Term_Averages)[1] <- "GO_ID" colnames(GO_Term_Averages)[2] <- "Term" - #create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used + # Create new columns GO_Term_Averages$Ontology <- NA GO_Term_Averages$NumGenes <- NA GO_Term_Averages$AllPossibleGenes <- NA GO_Term_Averages$Genes <- NA GO_Term_Averages$ORFs <- NA - #create a data.frame for the standard deviation info - GO_Term_SD <- X[0,] - GO_Term_SD[1:length(GO_Terms),] <- NA + # Create a data.frame for the standard deviation info + GO_Term_SD <- X[0, ] + GO_Term_SD[1:length(GO_Terms), ] <- NA colnames(GO_Term_SD)[1] <- "GO_ID" colnames(GO_Term_SD)[2] <- "Term" - #Loop for each GO term to get an average L and K Z score - for(i in 1:length(GO_Terms)){ - #get the GO_Term + # Loop for each GO term to get an average L and K Z score + for (i in 1:length(GO_Terms)) { + # Get the GO_Term ID <- GO_Terms[i] - #Get data.frame for all genes associated to the GO Term - ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,] - #get a vector of just the gene names + # Get data.frame for all genes associated to the GO Term + ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID, ] + # Get a vector of just the gene names ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]]) - if(length(unique(ID_AllGenes_vector)) > 4000){ + if (length(unique(ID_AllGenes_vector)) > 4000) { next() } - #get the GO term character description where numeric Terms ID matches GO_Term's ID - GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1]) - #get the Z scores for all genes in the GO_ID - Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,] - #get the Gene names and ORFs for the term - GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ") - GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ") + # Get the GO term character description where numeric Terms ID matches GO_Term's ID + GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric, ]$GO_Term[1]) + # Get the Z scores for all genes in the GO_ID + Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector, ] + # Get the Gene names and ORFs for the term + GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene), collapse = " | ") + GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF), collapse = " | ") - #dataframe to report the averages for a GO term - #get the GO ID + # Dataframe to report the averages for a GO term + # Get the GO ID GO_Term_Averages$GO_ID[i] <- as.character(ID) - #get the term name + # Get the term name GO_Term_Averages$Term[i] <- GO_Description_Term - #get total number of genes annotated to the Term that we have in our library + # Get total number of genes annotated to the Term that we have in our library GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF)) - #get total number of genes annotated to the Term in SGD + # Get total number of genes annotated to the Term in SGD GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector)) - #get the ontology of the term + # Get the ontology of the term GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1]) - #calculate the average score for every column - for(j in 3:length(X[1,])){ - GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE) - #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])]) + # Calculate the average score for every column + for (j in 3:length(X[1, ])) { + GO_Term_Averages[i, j] <- mean(Zscores_For_ID[, j], na.rm = TRUE) + # GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])]) } - #also calculate same values for the SD + # Also calculate same values for the SD GO_Term_SD$GO_ID[i] <- as.character(ID) - #get the term name + # Get the term name GO_Term_SD$Term[i] <- GO_Description_Term - #calculate column scores for SD - for(j in 3:length(X[1,])){ - GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE) - #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])]) + # Calculate column scores for SD + for (j in 3:length(X[1, ])) { + GO_Term_SD[i, j] <- sd(Zscores_For_ID[, j], na.rm = TRUE) + # GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])]) } } - #add either _Avg or _SD depending on if the calculated score is an average or SD - colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_") - colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_") - #combine the averages with the SDs to make one big data.frame - X2 <- cbind(GO_Term_Averages,GO_Term_SD) - #test[ , order(names(test))] - X2 <- X2[,order(names(X2))] - X2 <- X2[!is.na(X2$Z_lm_L_Avg),] - #create output file - write.csv(X2,file=paste(output_dir,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE) - #remove NAs - X3 <- X2[!is.na(X2$Z_lm_L_Avg),] - #identify redundant GO terms + # Add either _Avg or _SD depending on if the calculated score is an average or SD + colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages), "Avg", sep = "_") + colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD), "SD", sep = "_") + # Combine the averages with the SDs to make one big data.frame + X2 <- cbind(GO_Term_Averages, GO_Term_SD) + # Test[ , order(names(test))] + X2 <- X2[, order(names(X2))] + X2 <- X2[!is.na(X2$Z_lm_L_Avg), ] + # Create output file + write.csv(X2, file.path(output_dir, expName[m], "Average_GOTerms_All.csv"), row.names = FALSE) + # Remove NAs + X3 <- X2[!is.na(X2$Z_lm_L_Avg), ] - for(i in 1:length(X3[,1])){ - #loop through each GO term - get term + # Identify redundant GO terms + for (i in 1:length(X3[, 1])) { + # Loop through each GO term - get term GO_term_ID <- as.character(X3$GO_ID_Avg[i]) - #get term in the X3 - X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,] - #get anywhere that has the same number K_Avg value - X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,] - if(length(X3_Temp2[,1]) > 1){ - if(length(unique(X3_Temp2$Genes_Avg)) == 1){ - X3_Temp2 <- X3_Temp2[1,] + # Get term in the X3 + X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID, ] + # Get anywhere that has the same number K_Avg value + X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp, ] + if (length(X3_Temp2[, 1]) > 1) { + if (length(unique(X3_Temp2$Genes_Avg)) == 1) { + X3_Temp2 <- X3_Temp2[1, ] } } - if(i == 1){ + if (i == 1) { Y <- X3_Temp2 } - if(i > 1){ - Y <- rbind(Y,X3_Temp2) + if (i > 1) { + Y <- rbind(Y, X3_Temp2) } } - Y1 <- unique(Y) - - write.csv(Y1,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE) - - Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,] - Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),] - write.csv(Y2,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE) - - Y3 <- Y2[Y2$NumGenes_Avg > 2,] - write.csv(Y3,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE) - - Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,] - Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),] - write.csv(Y4,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE) - - Y5 <- Y4[Y4$NumGenes_Avg > 2,] - write.csv(Y5,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE) - - #End of 'for loop' -} + Y1 <- unique(Y) + write.csv(Y1, file.path(output_dir, exp_name, "Average_GOTerms_All_NonRedundantTerms.csv"), row.names = FALSE) + Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2, ] + Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg), ] + write.csv(Y2, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_L.csv"), row.names = FALSE) + Y3 <- Y2[Y2$NumGenes_Avg > 2, ] + write.csv(Y3, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv"), row.names = FALSE) + Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2, ] + Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg), ] + write.csv(Y4, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_K.csv"), row.names = FALSE) + Y5 <- Y4[Y4$NumGenes_Avg > 2, ] + write.csv(Y5, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv"), row.names = FALSE) +} diff --git a/workflow/apps/r/interactions.R b/workflow/apps/r/interactions.R index 7b076c32..05fea564 100644 --- a/workflow/apps/r/interactions.R +++ b/workflow/apps/r/interactions.R @@ -289,11 +289,11 @@ Plate_Analysis_L <- ggplot(X, aes(Scan, l, color = as.factor(Conc_Num))) + geom_point(shape = 3, size = 0.2) + stat_summary( - fun = mean, - fun.min = function(x) mean(x) - sd(x), + fun = mean, + fun.min = function(x) mean(x) - sd(x), fun.max = function(x) mean(x) + sd(x), geom = "errorbar" - ) + + ) + stat_summary(fun = mean, geom = "point", size = 0.6) + ggtitle("Plate analysis by Drug Conc for L before quality control") + theme_publication() @@ -989,7 +989,7 @@ for (s in Background_Strains) { X1_SD_outside_2SD_K <- max(X_stats_BY_L_outside_2SD_K$sd) # X1_SD_outside_2SD_K <- X[X$l %in% X1_SD_within_2SD_K$l, ] - Outside_2SD_K_L_vs_K <- + Outside_2SD_K_L_vs_K <- ggplot(X_outside_2SD_K, aes(l, K, color = as.factor(Conc_Num))) + geom_point(aes(ORF = ORF, Gene = Gene, Delta_Backgrd = Delta_Backgrd), shape = 3) + ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") + diff --git a/workflow/apps/r/joinInteractExps.R b/workflow/apps/r/joinInteractExps.R index 48dfa3ec..3b685364 100644 --- a/workflow/apps/r/joinInteractExps.R +++ b/workflow/apps/r/joinInteractExps.R @@ -9,7 +9,7 @@ args <- commandArgs(TRUE) # Set output dir if (length(args) >= 1) { - outDir <- args[1] + outDir <- file.path(args[1]) } else { outDir <- "./" # for legacy workflow } @@ -20,11 +20,11 @@ if (length(args) >= 2) { } else { sd <- 2 # default value } -print(paste("SD=",sd)) +print(paste("SD=", sd)) # Set studyInfo file if (length(args) >= 3) { - studyInfo <- args[3] + studyInfo <- file.path(args[3]) } else { studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow } @@ -32,7 +32,7 @@ if (length(args) >= 3) { studies <- args[3:length(args)] inputFiles <- c() for (study in 1:length(studies)) { - zsFile <- file.path(study, 'zscores', 'zscores_interaction.csv') + zsFile <- file.path(study, "zscores", "zscores_interaction.csv") if (file.exists(zsFile)) { inputFiles[study] <- zsFile } @@ -43,186 +43,191 @@ print(length(inputFiles)) # TODO this is better handled in a loop in case you want to compare more experiments? # The input is already designed for this # Read in the files for your experiment and -# Join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes. +# Join the two files at a time as a function of how many inputFile +# list the larger file first ? in this example X2 has the larger number of genes # If X1 has a larger number of genes, switch the order of X1 and X2 -if(length(inputFiles)==2) { - X1 <- read.csv(file=inputFiles[1],stringsAsFactors=FALSE) - X2 <- read.csv(file=inputFiles[2],stringsAsFactors=FALSE) - X <- join(X1,X2,by="OrfRep") - OBH=X[,order(colnames(X))] #OrderByHeader - headSel=select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) - headSel=select(headSel, -'Gene.1') #remove 'Gene.1 column - headSel2=select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums - headSel2=select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums +if (length(inputFiles) == 2) { + X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) + X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) + X <- join(X1, X2, by = "OrfRep") + OBH <- X[, order(colnames(X))] # OrderByHeader + headSel <- select(OBH, contains("OrfRep"), matches("Gene"), + contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) + headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column + headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums + headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums +} else if (length(inputFiles) == 3) { + X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) + X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) + X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) + X <- join(X1, X2, by = "OrfRep") + X <- join(X, X3, by = "OrfRep") + OBH <- X[, order(colnames(X))] #OrderByHeader + headSel <- select(OBH, contains("OrfRep"), matches("Gene"), + contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) + headSel <- select(headSel, -"Gene.1", -"Gene.2") + headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) + headSel2 <- select(headSel2, -"Gene.1", -"Gene.2") -}else if(length(inputFiles)==3){ - X1 <- read.csv(file=inputFiles[1],stringsAsFactors=FALSE) #exp1File,stringsAsFactors=FALSE) - X2 <- read.csv(file=inputFiles[2],stringsAsFactors=FALSE) #exp2File,stringsAsFactors=FALSE) - X3 <- read.csv(file=inputFiles[3],stringsAsFactors=FALSE) #exp3File,stringsAsFactors=FALSE) - X <- join(X1,X2,by="OrfRep") - X <- join(X,X3,by="OrfRep") - OBH=X[,order(colnames(X))] #OrderByHeader - headSel=select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) - headSel=select(headSel, -'Gene.1',-'Gene.2') - headSel2=select(OBH, contains('OrfRep'), matches('Gene')) - headSel2=select(headSel2, -'Gene.1',-'Gene.2') - -}else if(length(inputFiles)==4){ - X1 <- read.csv(file=inputFiles[1],stringsAsFactors=FALSE) #exp1File,stringsAsFactors=FALSE) - X2 <- read.csv(file=inputFiles[2],stringsAsFactors=FALSE) #exp2File,stringsAsFactors=FALSE) - X3 <- read.csv(file=inputFiles[3],stringsAsFactors=FALSE) #exp3File,stringsAsFactors=FALSE) - X4 <- read.csv(file=inputFiles[4],stringsAsFactors=FALSE) #exp4File,stringsAsFactors=FALSE) - X <- join(X1,X2,by="OrfRep") - X <- join(X,X3,by="OrfRep") - X <- join(X,X4,by="OrfRep") - OBH=X[,order(colnames(X))] #OrderByHeader - headSel=select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L')) - headSel=select(headSel, -'Gene.1',-'Gene.2',-'Gene.3') - headSel2=select(OBH, contains('OrfRep'), matches('Gene')) - headSel2=select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3') +} else if (length(inputFiles) == 4) { + X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE) + X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE) + X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE) + X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE) + X <- join(X1, X2, by = "OrfRep") + X <- join(X, X3, by = "OrfRep") + X <- join(X, X4, by = "OrfRep") + OBH <- X[, order(colnames(X))] #OrderByHeader + headSel <- select(OBH, contains("OrfRep"), matches("Gene"), + contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L")) + headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3") + headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) + headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3") } -#headSel$contains('Z_Shift') %>% replace_na(0.001) -headers<-colnames(headSel) -i=0 -for(i in 1:length(headers)){ - - if(grepl("Shift",headers[i])) { - headSel[headers[i]][is.na(headSel[headers[i]])]=0.001 +# headSel$contains("Z_Shift") %>% replace_na(0.001) +headers <- colnames(headSel) +i <- 0 +for (i in 1:length(headers)) { + if (grepl("Shift", headers[i])) { + headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001 } - if(grepl("Z_lm_",headers[i])) { - headSel[headers[i]][is.na(headSel[headers[i]])]=0.0001 + if (grepl("Z_lm_", headers[i])) { + headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001 } } -#2SD option code to exclude Z_lm values less than 2 standard Deviations - -REMcRdy=select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_')) -shiftOnly=select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift')) +# 2SD option code to exclude Z_lm values less than 2 standard Deviations +REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_lm_")) +shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift")) # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt -Labels <- read.csv(file="../Code/StudyInfo.csv",stringsAsFactors=FALSE,sep=",") +Labels <- read.csv(file = "../Code/StudyInfo.csv", stringsAsFactors = FALSE, sep = ",") # Using Text search grepl to relabel headers -REMcRdyHdr=colnames(REMcRdy) -REMcRdyLabels='asdf' -shftHdr=colnames(shiftOnly) -shiftLabels='asdf' -shiftLabels[1:2]<-shftHdr[1:2] -REMcRdyLabels[1:2]<-REMcRdyHdr[1:2] +REMcRdyHdr <- colnames(REMcRdy) +REMcRdyLabels <- "asdf" +shftHdr <- colnames(shiftOnly) +shiftLabels <- "asdf" +shiftLabels[1:2] <- shftHdr[1:2] +REMcRdyLabels[1:2] <- REMcRdyHdr[1:2] -for(i in 3:(length(shftHdr))){ - if(i==3){ - shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3]) - REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) } - if(i==5){ - shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5]) - REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5]) +for (i in 3:(length(shftHdr))) { + if (i == 3) { + shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3]) + REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3]) } - if(i==7){ - shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7]) - REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7]) + if (i == 5) { + shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5]) + REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5]) + } + if (i == 7) { + shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7]) + REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7]) + } + if (grepl(".1", shftHdr[i], fixed = true)) { + shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i]) + REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i]) + } + if (grepl(".2", shftHdr[i], fixed = true)) { + shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i]) + REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i]) + } + if (grepl(".3", shftHdr[i], fixed = true)) { + shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i]) + REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i]) } - if(grepl(".1",shftHdr[i],fixed=true)){ - shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i]) - REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])} - if (grepl(".2",shftHdr[i],fixed=true)){ - shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i]) - REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])} - if(grepl(".3",shftHdr[i],fixed=true)){ - shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i]) - REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])} } -for(i in 3:(length(REMcRdyLabels))){ - j=as.integer(i) - REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j]) - shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j]) +for (i in 3:(length(REMcRdyLabels))) { + j <- as.integer(i) + REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j]) + shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j]) } -colnames(shiftOnly)<- shiftLabels -colnames(REMcRdy)<- REMcRdyLabels +colnames(shiftOnly) <- shiftLabels +colnames(REMcRdy) <- REMcRdyLabels -combI=headSel2 #Starting Template orf, Genename columns +combI <- headSel2 # starting Template orf, Genename columns # headersRemc<-colnames(REMcRdy) # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps. -for(i in 3:length(colnames(REMcRdy))){ - combI=cbind.data.frame(combI, shiftOnly[i]) - combI=cbind.data.frame(combI, REMcRdy[i]) +for (i in 3:length(colnames(REMcRdy))) { + combI <- cbind.data.frame(combI, shiftOnly[i]) + combI <- cbind.data.frame(combI, REMcRdy[i]) } -Vec1=NA -Vec2=NA -Vec3=NA -Vec4=NA -Vec5=NA -Vec6=NA -Vec7=NA -Vec8=NA +Vec1 <- NA +Vec2 <- NA +Vec3 <- NA +Vec4 <- NA +Vec5 <- NA +Vec6 <- NA +Vec7 <- NA +Vec8 <- NA -if(length(REMcRdy)==6){ - Vec1=abs(REMcRdy[,3])>=std - Vec2=abs(REMcRdy[,4])>=std - Vec3=abs(REMcRdy[,5])>=std - Vec4=abs(REMcRdy[,6])>=std - bolVec=Vec1 | Vec2 |Vec3 |Vec4 - REMcRdyGT2=REMcRdy[bolVec,1:2] - REMcRdyGT2[ ,3:6]=REMcRdy[bolVec,3:6] - shiftOnlyGT2=shiftOnly[bolVec,1:2] - shiftOnlyGT2[ ,3:6]=shiftOnly[bolVec,3:6] +if (length(REMcRdy) == 6) { + Vec1 <- abs(REMcRdy[, 3]) >= std + Vec2 <- abs(REMcRdy[, 4]) >= std + Vec3 <- abs(REMcRdy[, 5]) >= std + Vec4 <- abs(REMcRdy[, 6]) >= std + bolVec <- Vec1 | Vec2 | Vec3 | Vec4 + REMcRdyGT2 <- REMcRdy[bolVec, 1:2] + REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6] + shiftOnlyGT2 <- shiftOnly[bolVec, 1:2] + shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6] } -if(length(REMcRdy)==8){ - Vec1=abs(REMcRdy[,3])>=std - Vec2=abs(REMcRdy[,4])>=std - Vec3=abs(REMcRdy[,5])>=std - Vec4=abs(REMcRdy[,6])>=std - Vec5=abs(REMcRdy[,7])>=std - Vec6=abs(REMcRdy[,8])>=std - bolVec=Vec1 | Vec2 |Vec3 | Vec4 |Vec5 |Vec6 - REMcRdyGT2=REMcRdy[bolVec,1:2] - REMcRdyGT2[ ,3:8]=REMcRdy[bolVec,3:8] - shiftOnlyGT2=shiftOnly[bolVec,1:2] - shiftOnlyGT2[ ,3:8]=shiftOnly[bolVec,3:8] +if (length(REMcRdy) == 8) { + Vec1 <- abs(REMcRdy[, 3]) >= std + Vec2 <- abs(REMcRdy[, 4]) >= std + Vec3 <- abs(REMcRdy[, 5]) >= std + Vec4 <- abs(REMcRdy[, 6]) >= std + Vec5 <- abs(REMcRdy[, 7]) >= std + Vec6 <- abs(REMcRdy[, 8]) >= std + bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 + REMcRdyGT2 <- REMcRdy[bolVec, 1:2] + REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8] + shiftOnlyGT2 <- shiftOnly[bolVec, 1:2] + shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8] } -if(length(REMcRdy)==10){ - Vec1=abs(REMcRdy[,3])>=std - Vec2=abs(REMcRdy[,4])>=std - Vec3=abs(REMcRdy[,5])>=std - Vec4=abs(REMcRdy[,6])>=std - Vec5=abs(REMcRdy[,7])>=std - Vec6=abs(REMcRdy[,8])>=std - Vec7=abs(REMcRdy[,9])>=std - Vec8=abs(REMcRdy[,10])>=std - bolVec=Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8 - REMcRdyGT2=REMcRdy[bolVec,1:2] - REMcRdyGT2[ ,3:10]=REMcRdy[bolVec,3:10] - shiftOnlyGT2=shiftOnly[bolVec,1:2] - shiftOnlyGT2[ ,3:10]=shiftOnly[bolVec,3:10] +if (length(REMcRdy) == 10) { + Vec1 <- abs(REMcRdy[, 3]) >= std + Vec2 <- abs(REMcRdy[, 4]) >= std + Vec3 <- abs(REMcRdy[, 5]) >= std + Vec4 <- abs(REMcRdy[, 6]) >= std + Vec5 <- abs(REMcRdy[, 7]) >= std + Vec6 <- abs(REMcRdy[, 8]) >= std + Vec7 <- abs(REMcRdy[, 9]) >= std + Vec8 <- abs(REMcRdy[, 10]) >= std + bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8 + REMcRdyGT2 <- REMcRdy[bolVec, 1:2] + REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10] + shiftOnlyGT2 <- shiftOnly[bolVec, 1:2] + shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10] } -if(std!=0){ - REMcRdy=REMcRdyGT2 # [,2:length(REMcRdyGT2)] - shiftOnly=shiftOnlyGT2 # [,2:length(shiftOnlyGT2)] +if (std != 0) { + REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)] + shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)] } -if(std==0){ - REMcRdy=REMcRdy # [,2:length(REMcRdy)] - shiftOnly=shiftOnly # [,2:length(shiftOnly)] +if (std == 0) { + REMcRdy <- REMcRdy # [,2:length(REMcRdy)] + shiftOnly <- shiftOnly # [,2:length(shiftOnly)] } -# R places hidden "" around the header names. The following +# R places hidden "" around the header names. The following # is intended to remove those quote so that the "" do not blow up the Java REMc. # Use ,quote=F in the write.csv statement to fix R output file. -#write.csv(combI,file=file.path(outDir,"CombinedKLzscores.csv"),row.names=FALSE) -write.csv(REMcRdy,file=file.path(outDir,"REMcRdy_lm_only.csv"),row.names=FALSE, quote=F) -write.csv(shiftOnly,file=file.path(outDir,"Shift_only.csv"),row.names=FALSE, quote=F) -#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors=FALSE,sep=",") +# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE) +write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) +write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) +#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",") -LabelStd<-read.csv(file=studyInfo,stringsAsFactors=FALSE) +LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE) print(std) -LabelStd[,4]=as.numeric(std) -write.csv(LabelStd,file=file.path(outDir,"parameters.csv"),row.names=FALSE) -write.csv(LabelStd,file=studyInfo,row.names=FALSE) +LabelStd[, 4] <- as.numeric(std) +write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE) +write.csv(LabelStd, file = studyInfo, row.names = FALSE) diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index c5cc5186..bb80c188 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -1620,7 +1620,12 @@ gta() { for combo in "${study_combos[@]}"; do # Split on comma and assign to array IFS=',' read -ra studies <<< "$combo" - r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$gta_out_dir" + r_gta_pairwiselk \ + "${studies[0]}" \ + "${studies[1]}" \ + "$STUDY_INFO_FILE" \ + "Average_GOTerms_All.csv" \ + "$gta_out_dir" done # All studies @@ -1660,17 +1665,18 @@ wrapper r_gta # # TODO # -# * Is GTAtemplate.R actually a template? -# * Do we need to allow user customization? +# * Is GTAtemplate.R actually a template? +# * Do we need to allow user customization? # -# Files +# INPUT # -# * [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd) -# * go_terms.tab +# * [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd) +# * go_terms.tab # -# Output +# OUTPUT +# +# * Average_GOTerms_All.csv # -# * # # @arg $1 string Exp# name # @arg $2 string ZScores_Interaction.csv file @@ -1683,7 +1689,7 @@ r_gta() { cat <<-EOF EOF - script="$APPS_DIR/r/GTAtemplate.R" + script="$APPS_DIR/r/gtaTemplate.R" [[ -d $5 ]] || mkdir -p "$5" debug "$RSCRIPT $script $*" "$RSCRIPT" "$script" \ @@ -1701,16 +1707,18 @@ wrapper r_gta_pairwiselk # # TODO # -# * Should move directory creation from PairwiseLK.R to gta module +# * Move directory creation from PairwiseLK.R to gta module +# * Needs better output filenames and directory organization +# * Needs more for looping to reduce verbosity # -# Files +# INPUT # -# * -# * +# * Average_GOTerms_All.csv +# * # # Output # -# * +# * # # This wrapper: # @@ -1724,7 +1732,7 @@ wrapper r_gta_pairwiselk # @arg $4 string output directory # r_gta_pairwiselk() { - debug "Running: ${FUNCNAME[0]}" + debug "Running: ${FUNCNAME[0]} $*" cat <<-EOF EOF @@ -1734,7 +1742,12 @@ r_gta_pairwiselk() { [[ -d $4 ]] || mkdir -p "$4" debug "$RSCRIPT $script $*" - "$RSCRIPT" "$script" "$@" + "$RSCRIPT" "$script" \ + "$1" \ + "$2" \ + "$3" \ + "$4" \ + "${@:5}" } @@ -1842,6 +1855,10 @@ r_interactions() { wrapper r_join_interactions # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv # +# TODO +# * Needs more loops to reduce verbosity +# +# # Output # # * REMcRdy_lm_only.csv