Rollup before parallelization

This commit is contained in:
2024-08-14 23:20:29 -04:00
parent 1ba1f14537
commit 6992d5eec0
8 changed files with 2517 additions and 2434 deletions

View File

@@ -1,28 +1,27 @@
#!/usr/bin/env Rscript
# This script will make homology heatmaps for the REMc analysis
# This script didn't have any hard set inputs so I didn't bother
library(RColorBrewer)
library(gplots)
library(tidyverse)
library("RColorBrewer")
library("gplots")
library("tidyverse")
args <- commandArgs(TRUE)
# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
inputFinalTable <- file.path(args[1])
# Give the DAmP_list.txt as the third argument - will color the gene names differently
DAmPs <- file.path(Args[2])
DAmP_list <- read.delim(file = DAmPs, header = FALSE, stringsAsFactors = FALSE)
# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
mapFile <- file.path(Args[3])
mapping <- read.csv(file = mapFile, stringsAsFactors = FALSE)
# Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
outputPath <- file.path(Args[4])
output_path <- file.path(Args[1])
# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
final_table <- file.path(args[2])
# Give the damp_list.txt as the third argument - will color the gene names differently
damps <- file.path(Args[3])
damp_list <- read.delim(file = damps, header = FALSE, stringsAsFactors = FALSE)
# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
map_file <- file.path(Args[4])
mapping <- read.csv(file = map_file, stringsAsFactors = FALSE)
# Read in finalTablewithShift
hmapfile <- data.frame(read.csv(file = inputFinalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE))
hmapfile <- data.frame(read.csv(file = final_table, header = TRUE, sep = ",", stringsAsFactors = FALSE))
# Map the finalTable to the human homolog file
hmapfile_map <- hmapfile
@@ -46,11 +45,11 @@ hmapfile_w_homolog <- full_join(hmapfile_map, mapping, by = c("ORFMatch" = "ense
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == FASLE, ]
# Write csv with all info from mapping file
write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologAll.csv", sep = "")), row.names = FALSE)
write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologAll.csv", sep = "")), row.names = FALSE)
# Remove the non matches and output another mapping file - this is also one used to make heatmaps
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == FALSE, ]
write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE))
write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE))
# Add human gene name to the Gene column
hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene, hmapfile_w_homolog$external_gene_name_Human, sep = "/")
@@ -176,14 +175,14 @@ if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) {
# m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 3)])
colnames(DAmP_list)[1] <- "ORF"
hmapfile$DAmPs <- "YKO"
colnames(damp_list)[1] <- "ORF"
hmapfile$damps <- "YKO"
colnames(hmapfile)[2] <- "ORF"
try(hmapfile[hmapfile$ORF %in% DAmP_list$ORF, ]$DAmPs <- "YKD")
# X <- X[order(X$DAmPs,decreasing = TRUE),]
try(hmapfile[hmapfile$ORF %in% damp_list$ORF, ]$damps <- "YKD")
# X <- X[order(X$damps,decreasing = TRUE),]
hmapfile$color2 <- NA
try(hmapfile[hmapfile$DAmPs == "YKO", ]$color2 <- "black")
try(hmapfile[hmapfile$DAmPs == "YKD", ]$color2 <- "red")
try(hmapfile[hmapfile$damps == "YKO", ]$color2 <- "black")
try(hmapfile[hmapfile$damps == "YKD", ]$color2 <- "red")
hmapfile$color <- NA
try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_many2many", ]$color <- "#F8766D")
@@ -231,7 +230,7 @@ for (i in 1:num_unique_clusts) {
if (cluster_length != 1) {
X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 6)])
if (cluster_length >= 2001) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 20, width = 15)
heatmap.2(
x = X0,
@@ -251,7 +250,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 201 && cluster_length <= 2000) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 15, width = 12)
heatmap.2(
x = X0,
@@ -270,7 +269,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 150 && cluster_length <= 200) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -288,7 +287,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 101 && cluster_length <= 149) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -306,7 +305,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 60 && cluster_length <= 100) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -324,7 +323,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length <= 59 && cluster_length >= 30) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 9, width = 12)
heatmap.2(
x = X0,
@@ -342,7 +341,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length <= 29) {
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 7, width = 12)
heatmap.2(
x = X0,