123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810 |
- #!/usr/bin/env Rscript
- # Makes heat maps of multiple experiments
- #
- # Updated 240724 Bryan C Roessler to improve file operations and portability
- # I tried to leave as much logic intact as possible, just feeding in vars in a better way
- # NOTE: The script now has 7 required arguments and a variable number of input experiments
- # @arg $1 string StudyInfo.csv file
- # @arg $2 string gene_ontology_edit.obo file
- # @arg $3 string go_terms.tab file
- # @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
- # @arg $5 string base directory
- # @arg $6 string output directory
- library("ontologyIndex")
- library("ggplot2")
- library("RColorBrewer")
- library("grid")
- library("ggthemes")
- # library("plotly")
- # library("htmlwidgets")
- library("extrafont")
- library("stringr")
- library("org.Sc.sgd.db")
- library("ggrepel")
- library("gplots")
- # Load arguments
- args <- commandArgs(TRUE)
- study_info_file <- args[1]
- ontology_file <- args[2]
- sgd_terms_tfile <- args[3]
- all_sgd_terms_csv <- args[4]
- base_dir <- args[5]
- output_dir <- args[6]
- study_nums <- args[7:length(args)]
- # Import standard tables used in Sean's code That should be copied to each ExpStudy
- labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
- Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal")
- GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) # all ORFs associated with GO term
- Terms <- read.delim(file = sgd_terms_tfile,
- header = FALSE,
- quote = "",
- col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition")
- )
- XX3 <- read.csv(file = all_sgd_terms_csv, stringsAsFactors = FALSE, header = TRUE)
- XX3[, 1] <- paste0("GO:", formatC(XX3[, 1], width = 7, flag = "0"))
- XX3[, 2] <- gsub(pattern = " ", replacement = "_", x = XX3[, 2])
- XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2])
- # Load input files
- for (study_num in study_nums) {
- input_file <- file.path(base_dir, paste("Exp", study_num), zscores, "zscores_interaction.csv")
- if (file.exists(input_file)) {
- assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE))
- assign(paste(Name, study_num), labels[study_num, 2])
- }
- }
- for (study_num in study_nums) {
- eval(paste("function", study_num))
- }
- if (length(study_nums) > 0) {
- X1$ORF <- X1$OrfRep
- X1$ORF <- gsub("_1", "", x = X1$ORF)
- X1$ORF <- gsub("_2", "", x = X1$ORF)
- X1$ORF <- gsub("_3", "", x = X1$ORF)
- X1$ORF <- gsub("_4", "", x = X1$ORF)
-
- X1$Score_L <- "No Effect"
- try(X1[is.na(X1$Z_lm_L), ]$Score_L <- "No Growth")
- try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
- try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
-
- X1$Score_K <- "No Effect"
- try(X1[is.na(X1$Z_lm_K), ]$Score_K <- "No Growth")
- try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
- try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
-
- # Express the na data as 0.001 in X1 for K and L
- X1[is.na(X1$Z_lm_L), ]$Z_lm_L <- 0.001
- X1[is.na(X1$Z_lm_K), ]$Z_lm_K <- 0.001
- X1$Rank_L <- rank(X1$Z_lm_L)
- X1$Rank_K <- rank(X1$Z_lm_K)
- X1 <- X1[order(X1$OrfRep, decreasing = FALSE), ]
- colnames(X1) <- paste0(colnames(X1), "_X1")
- }
- if (length(study_nums) > 1) {
- X2$ORF <- X2$OrfRep
- X2$ORF <- gsub("_1", "", x = X2$ORF)
- X2$ORF <- gsub("_2", "", x = X2$ORF)
- X2$ORF <- gsub("_3", "", x = X2$ORF)
- X2$ORF <- gsub("_4", "", x = X2$ORF)
-
- X2$Score_L <- "No Effect"
- try(X2[is.na(X2$Z_lm_L), ]$Score_L <- "No Growth")
- try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
- try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
-
- X2$Score_K <- "No Effect"
- try(X2[is.na(X2$Z_lm_K), ]$Score_K <- "No Growth")
- try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
- try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
- #express the na data as 0.001 in X2
- X2[is.na(X2$Z_lm_L), ]$Z_lm_L <- 0.001
- X2[is.na(X2$Z_lm_K), ]$Z_lm_K <- 0.001
-
- X2$Rank_L <- rank(X2$Z_lm_L)
- X2$Rank_K <- rank(X2$Z_lm_K)
-
- X2 <- X2[order(X2$OrfRep, decreasing = FALSE), ]
- colnames(X2) <- paste0(colnames(X2), "_X2")
- X <- cbind(X1, X2)
- }
- if (length(study_nums) > 2) {
- X3$ORF <- X3$OrfRep
- X3$ORF <- gsub("_1", "", x = X3$ORF)
- X3$ORF <- gsub("_2", "", x = X3$ORF)
- X3$ORF <- gsub("_3", "", x = X3$ORF)
- X3$ORF <- gsub("_4", "", x = X3$ORF)
-
- X3$Score_L <- "No Effect"
- try(X3[is.na(X3$Z_lm_L), ]$Score_L <- "No Growth")
- try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
- try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
-
- X3$Score_K <- "No Effect"
- try(X3[is.na(X3$Z_lm_K), ]$Score_K <- "No Growth")
- try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
- try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
-
- # Express the na data as 0.001 in X3
- X3[is.na(X3$Z_lm_L), ]$Z_lm_L <- 0.001
- X3[is.na(X3$Z_lm_K), ]$Z_lm_K <- 0.001
- X3$Rank_L <- rank(X3$Z_lm_L)
- X3$Rank_K <- rank(X3$Z_lm_K)
- X3 <- X3[order(X3$OrfRep, decreasing = FALSE), ]
- colnames(X3) <- paste0(colnames(X3), "_X3")
- X <- cbind(X, X3)
- }
- if (length(study_nums) > 3) {
- X4$ORF <- X4$OrfRep
- X4$ORF <- gsub("_1", "", x = X4$ORF)
- X4$ORF <- gsub("_2", "", x = X4$ORF)
- X4$ORF <- gsub("_3", "", x = X4$ORF)
- X4$ORF <- gsub("_4", "", x = X4$ORF)
-
- X4$Score_L <- "No Effect"
- try(X4[is.na(X4$Z_lm_L), ]$Score_L <- "No Growth")
- try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
- try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
-
- X4$Score_K <- "No Effect"
- try(X4[is.na(X4$Z_lm_K), ]$Score_K <- "No Growth")
- try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
- try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
-
- # Express the na data as 0.001 in X4
- X4[is.na(X4$Z_lm_L), ]$Z_lm_L <- 0.001
- X4[is.na(X4$Z_lm_K), ]$Z_lm_K <- 0.001
- X4$Rank_L <- rank(X4$Z_lm_L)
- X4$Rank_K <- rank(X4$Z_lm_K)
- X4 <- X4[order(X4$OrfRep, decreasing = FALSE), ]
- colnames(X4) <- paste0(colnames(X4), "_X4")
- X <- cbind(X, X4)
- }
- if (length(study_nums) > 4) {
- X5$ORF <- X5$OrfRep
- X5$ORF <- gsub("_1", "", x = X5$ORF)
- X5$ORF <- gsub("_2", "", x = X5$ORF)
- X5$ORF <- gsub("_3", "", x = X5$ORF)
- X5$ORF <- gsub("_4", "", x = X5$ORF)
-
- X5$Score_L <- "No Effect"
- try(X5[is.na(X5$Z_lm_L), ]$Score_L <- "No Growth")
- try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
- try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
-
- X5$Score_K <- "No Effect"
- try(X5[is.na(X5$Z_lm_K), ]$Score_K <- "No Growth")
- try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
- try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
-
- # Express the na data as 0.001 in X5
- X5[is.na(X5$Z_lm_L), ]$Z_lm_L <- 0.001
- X5[is.na(X5$Z_lm_K), ]$Z_lm_K <- 0.001
- X5$Rank_L <- rank(X5$Z_lm_L)
- X5$Rank_K <- rank(X5$Z_lm_K)
- X5 <- X5[order(X5$OrfRep, decreasing = FALSE), ]
- colnames(X5) <- paste0(colnames(X5), "_X5")
- X <- cbind(X, X5)
- }
- X$ORF <- X$OrfRep_X1
- if (length(study_nums) > 1) {
- X$ORF <- gsub("_1", "", x = X$ORF)
- try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
- try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
- X_heatmap <-
- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
- colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
- colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
- colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
- colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
-
- X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)]
- colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
- colnames(X_heatmap)[2] <- "Gene"
- }
- if (length(study_nums) > 2) {
- X$ORF <- gsub("_1", "", x = X$ORF)
- X$ORF <- gsub("_2", "", x = X$ORF)
- try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
- try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
- try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
-
- X_heatmap <-
- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
- colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
- colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
- colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
- colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
- colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
- colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]
- # Reorder columns
- X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)]
- colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
- colnames(X_heatmap)[2] <- "Gene"
- }
- if (length(study_nums) > 3) {
- X$ORF <- gsub("_1", "", x = X$ORF)
- X$ORF <- gsub("_2", "", x = X$ORF)
- X$ORF <- gsub("_3", "", x = X$ORF)
- try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
- try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
- try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
- try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4)
-
- X_heatmap <-
- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
- colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
- colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
- colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
- colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
- colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
- colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
- colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
- colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]
- # Reorder columns
- X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)]
- colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap))
- colnames(X_heatmap)[2] <- "Gene"
- }
- if (length(study_nums) > 4) {
- X$ORF <- gsub("_1", "", x = X$ORF)
- X$ORF <- gsub("_2", "", x = X$ORF)
- X$ORF <- gsub("_3", "", x = X$ORF)
- X$ORF <- gsub("_4", "", x = X$ORF)
- try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
- try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
- try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
- try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4)
- try(X[X$Gene_X5 == "", ]$Gene_X5 <- X[X$Gene_X5 == "", ]$OrfRep_X5)
-
- X_heatmap <-
- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
- colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
- colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
- colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
- colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
- colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
- colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
- colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
- colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
- colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
- colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
- # Reorder columns
- X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)]
- colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap))
- colnames(X_heatmap) <- gsub(pattern = "X5", replacement = Name5, colnames(X_heatmap))
- colnames(X_heatmap)[2] <- "Gene"
- }
- # Theme elements for plots
- theme_Publication <- function(base_size = 14, base_family = "sans") {
- (theme_foundation(base_size = base_size, base_family = base_family) +
- theme(
- plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
- text = element_text(),
- panel.background = element_rect(colour = NA),
- plot.background = element_rect(colour = NA),
- panel.border = element_rect(colour = NA),
- axis.title = element_text(face = "bold", size = rel(1)),
- axis.title.y = element_text(angle = 90, vjust = 2),
- axis.title.x = element_text(vjust = -0.2),
- axis.text = element_text(),
- axis.line = element_line(colour = "black"),
- axis.ticks = element_line(),
- panel.grid.major = element_line(colour = "#f0f0f0"),
- panel.grid.minor = element_blank(),
- legend.key = element_rect(colour = NA),
- legend.position = "bottom",
- legend.direction = "horizontal",
- legend.key.size = unit(0.2, "cm"),
- legend.spacing = unit(0, "cm"),
- legend.title = element_text(face = "italic"),
- plot.margin = unit(c(10, 5, 5, 5), "mm"),
- strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
- strip.text = element_text(face = "bold")
- )
- )
- }
- scale_fill_Publication <- function(...) {
- library(scales)
- discrete_scale(
- "fill",
- "Publication",
- manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
- ...
- )
- }
- scale_colour_Publication <- function(...) {
- discrete_scale(
- "colour",
- "Publication",
- manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
- ...
- )
- }
- theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
- (theme_foundation(base_size = base_size, base_family = base_family) +
- theme(
- plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
- text = element_text(),
- panel.background = element_rect(colour = NA),
- plot.background = element_rect(colour = NA),
- panel.border = element_rect(colour = NA),
- axis.title = element_text(face = "bold", size = rel(1)),
- axis.title.y = element_text(angle = 90, vjust = 2),
- axis.title.x = element_text(vjust = -0.2),
- axis.text = element_text(),
- axis.line = element_line(colour = "black"),
- axis.ticks = element_line(),
- panel.grid.major = element_line(colour = "#f0f0f0"),
- panel.grid.minor = element_blank(),
- legend.key = element_rect(colour = NA),
- legend.position = "right",
- legend.direction = "vertical",
- legend.key.size = unit(0.5, "cm"),
- legend.spacing = unit(0, "cm"),
- legend.title = element_text(face = "italic"),
- plot.margin = unit(c(10, 5, 5, 5), "mm"),
- strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
- strip.text = element_text(face = "bold")
- )
- )
- }
- scale_fill_Publication <- function(...) {
- discrete_scale(
- "fill",
- "Publication",
- manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
- ...
- )
- }
- scale_colour_Publication <- function(...) {
- discrete_scale(
- "colour",
- "Publication",
- manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
- ...
- )
- }
- Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal")
- print(Ontology)
- # All ORFs associated with GO term
- GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
- # Terms is the GO term list jwr moved up to TAABLES
- Terms <- read.delim(file = sgd_terms_tfile,
- header = FALSE,
- quote = "",
- col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition")
- )
- colormapbreaks <- c(-12, -10, -8, -6, -4, -2, 2, 4, 6, 8, 10, 12)
- for (s in 1:dim(XX3)[1]) {
- # Ontology <-
- # get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",
- # propagate_relationships = "is_a", extract_tags = "minimal")
- # Ontology_Everything <-
- # get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",
- # propagate_relationships = "is_a", extract_tags = "everything")
- #
- # GO_ID_Arg <- "GO:0006325"
- GO_ID_Arg_loop <- as.character(XX3[s, 1])
- GOTerm_parent <- get_descendants(Ontology, roots = GO_ID_Arg_loop)
- # GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
- # Only make plots if parent term has fewer than 500 children
- Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
-
- if (length(GOTerm_parent) > 100) {
- next()
- }
-
- Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]])) # TODO why twice?
- if (Parent_Size < 2) {
- next()
- }
- if (Parent_Size > 2000) {
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 45,
- onefile = TRUE
- )
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
-
- if (Parent_Size >= 1000 && Parent_Size <= 2000) {
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 35,
- onefile = TRUE
- )
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size >= 500 && Parent_Size <= 1000) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 30,
- onefile = TRUE
- )
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size >= 200 && Parent_Size <= 500) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 25,
- onefile = TRUE
- )
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size >= 100 && Parent_Size <= 200) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 20,
- onefile = TRUE
- )
-
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size >= 60 && Parent_Size <= 100) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 15,
- onefile = TRUE
- )
-
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size >= 30 && Parent_Size <= 60) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 10,
- onefile = TRUE
- )
-
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
-
- dev.off()
- }
- if (Parent_Size >= 3 && Parent_Size <= 30) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 7,
- onefile = TRUE
- )
-
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- if (Parent_Size == 2) {
-
- pdf(
- file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
- width = 12,
- height = 7,
- onefile = TRUE
- )
-
- for (i in 1:length(GOTerm_parent)) {
- GO_Term <- GOTerm_parent[i]
- GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
- GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
- # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
- All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
- Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
- X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
- if (dim(Genes_Annotated_to_Term)[1] > 2) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
- try(heatmap.2(
- x = X0,
- Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
- dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
- breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
- ylab = "Gene",
- cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
- keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
- na.color = "red", col = brewer.pal(11, "PuOr"),
- main = GO_Term_Name,
- # ColSideColors = ev_repeat,
- labRow = as.character(Genes_Annotated_to_Term$Gene)
- ))
- }
- }
- dev.off()
- }
- }
|