Lint R scripts

This commit is contained in:
2024-08-13 15:59:18 -04:00
parent f190967383
commit 79862ddab4
4 changed files with 185 additions and 170 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@ manual.odt
mwe
centos-upgrade-plan.txt
workflow/out/
workflow/scans/

View File

@@ -10,25 +10,25 @@ library(sos)
args <- commandArgs(TRUE)
if (length(args) >= 1) {
finalTable <- args[1]
finalTable <- file.path(args[1])
} else {
finalTable <- "REMcRdy_lm_only.csv-finalTable.csv" # for legacy workflow
}
if (length(args) >= 2) {
shiftFile <- args[2]
shiftFile <- file.path(args[2])
} else {
shiftFile <- "Shift_only.csv" # for legacy workflow
}
if (length(args) >= 3) {
studyInfo <- args[3]
studyInfo <- file.path(args[3])
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
if (length(args) >= 4) {
output <- args[4]
output <- file.path(args[4])
} else {
output <- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}

View File

@@ -171,7 +171,7 @@ for (i in 1:num_unique_clusts) {
if (cluster_length != 1) {
X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 2)])
if (cluster_length >= 2001) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 20, width = 15)
heatmap.2(
x = X0,
@@ -191,7 +191,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 201 && cluster_length <= 2000) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 15, width = 12)
heatmap.2(
x = X0,
@@ -210,7 +210,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 150 && cluster_length <= 200) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -228,7 +228,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 101 && cluster_length <= 149) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -246,7 +246,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length >= 60 && cluster_length <= 100) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -264,7 +264,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length <= 59 && cluster_length >= 30) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 9, width = 12)
heatmap.2(
x = X0,
@@ -282,7 +282,7 @@ for (i in 1:num_unique_clusts) {
dev.off()
}
if (cluster_length <= 29) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 7, width = 12)
heatmap.2(
x = X0,

View File

@@ -8,18 +8,18 @@ library(tidyverse)
args <- commandArgs(TRUE)
# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
inputFinalTable <- args[1]
inputFinalTable <- file.path(args[1])
# Give the DAmP_list.txt as the third argument - will color the gene names differently
DAmPs <- Args[2]
DAmP_list <- read.delim(file=DAmPs,header=F,stringsAsFactors = F)
DAmPs <- file.path(Args[2])
DAmP_list <- read.delim(file = DAmPs, header = FALSE, stringsAsFactors = FALSE)
# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
mapFile <- Args[3]
mapping <- read.csv(file=mapFile,stringsAsFactors = F)
mapFile <- file.path(Args[3])
mapping <- read.csv(file = mapFile, stringsAsFactors = FALSE)
# Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
outputPath <- Args[4]
outputPath <- file.path(Args[4])
# Read in finalTablewithShift
hmapfile <- data.frame(read.csv(file = inputFinalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE))
@@ -43,14 +43,14 @@ hmapfile_map$ORFMatch <- gsub("_4","",x=hmapfile_map$ORFMatch)
hmapfile_w_homolog <- full_join(hmapfile_map, mapping, by = c("ORFMatch" = "ensembl_gene_id"))
# Remove matches that are not from the finalTable
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == F,]
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == FASLE, ]
# Write csv with all info from mapping file
write.csv(hmapfile_w_homolog,file=paste(outputPath,"/",inputFinalTable,"_WithHomologAll.csv",sep=""),row.names = F)
write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologAll.csv", sep = "")), row.names = FALSE)
# Remove the non matches and output another mapping file - this is also one used to make heatmaps
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == F,]
write.csv(hmapfile_w_homolog,file=paste(outputPath,"/",inputFinalTable,"_WithHomologMatchesOnly.csv",sep=""),row.names = F)
hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == FALSE, ]
write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE))
# Add human gene name to the Gene column
hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene, hmapfile_w_homolog$external_gene_name_Human, sep = "/")
@@ -58,7 +58,8 @@ hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene,hmapfile_w_homolog$exte
# Only keep the finalTable file columns and the homology info
hmap_len <- dim(hmapfile)[2]
hmapfile_w_homolog_remake <- cbind(hmapfile_w_homolog[,1:hmap_len], hsapiens_homolog_orthology_type=hmapfile_w_homolog$hsapiens_homolog_orthology_type)
hmapfile_w_homolog_remake <-
cbind(hmapfile_w_homolog[, 1:hmap_len], hsapiens_homolog_orthology_type = hmapfile_w_homolog$hsapiens_homolog_orthology_type)
hmapfile <- hmapfile_w_homolog_remake
# Set NAs to NA
@@ -70,12 +71,13 @@ hmapfile[hmapfile == -0.001] <- NA
# Select the number of rows based on the number of genes
num_total_genes <- length(hmapfile[, 1])
# break out the cluster names so each part of the cluster origin can be accessed
# line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# Break out the cluster names so each part of the cluster origin can be accessed
# Line below removed because it adds to many genes to clusters when going past 1-0-10
# since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# hmapfile$cluster.origin = gsub(" ","",x = hmapfile$cluster.origin)
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
hmapfile$cluster.origin <- gsub(";", " ;", x = hmapfile$cluster.origin)
hmapfile$cluster.origin <- strsplit(hmapfile$cluster.origin, ";")
# use tail(x,n) for accessing the outward most cluster
clust_rounds <- 0
@@ -94,7 +96,8 @@ num_unique_clusts <- length(unique_clusts)
# Base the color key on a statistical analysis of the L and K data
# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
# scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
# scale() will calculate the mean and standard deviation of the entire vector
# then "scale" each element by those values by subtracting the mean and dividing by the sd
# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
@@ -142,12 +145,16 @@ print(KEY_MIN)
print(L_MAX)
#print(L_Multiplier)
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
colormapbreaks <- c(KEY_MIN, KEY_MIN * (5 / 6), KEY_MIN * (4 / 6), KEY_MIN * (3 / 6),
KEY_MIN * (2 / 6), KEY_MIN * (1 / 6), KEY_MAX * (1 / 6), KEY_MAX * (2 / 6),
KEY_MAX * (3 / 6), KEY_MAX * (4 / 6), KEY_MAX * (5 / 6), KEY_MAX)
# print(colormapbreaks)
# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
# However since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
# However since also using this to amend the first part.
# Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values
# possibly could get rid of redundant shift values if we don't want to view these
# could we pool all the shift data/average it?
if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == TRUE) {
even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 2)
@@ -224,7 +231,7 @@ for(i in 1:num_unique_clusts){
if (cluster_length != 1) {
X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 6)])
if (cluster_length >= 2001) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 20, width = 15)
heatmap.2(
x = X0,
@@ -238,12 +245,13 @@ for(i in 1:num_unique_clusts){
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
# ColSideColors = ev_repeat,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
# abline(v = 0.5467,col = "black")
dev.off()
}
if (cluster_length >= 201 && cluster_length <= 2000) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 15, width = 12)
heatmap.2(
x = X0,
@@ -256,12 +264,13 @@ for(i in 1:num_unique_clusts){
keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
# abline(v = 0.5316,col = "black")
dev.off()
}
if (cluster_length >= 150 && cluster_length <= 200) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -274,12 +283,13 @@ for(i in 1:num_unique_clusts){
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
dev.off()
}
if (cluster_length >= 101 && cluster_length <= 149) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,mypath,height=12,width=12)
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
@@ -291,11 +301,12 @@ for(i in 1:num_unique_clusts){
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
dev.off()
}
if (cluster_length >= 60 && cluster_length <= 100) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
@@ -308,11 +319,12 @@ for(i in 1:num_unique_clusts){
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
dev.off()
}
if (cluster_length <= 59 && cluster_length >= 30) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 9, width = 12)
heatmap.2(
x = X0,
@@ -325,11 +337,12 @@ for(i in 1:num_unique_clusts){
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
dev.off()
}
if (cluster_length <= 29) {
mypath = file.path(outputPath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
pdf(file = mypath, height = 7, width = 12)
heatmap.2(
x = X0,
@@ -343,7 +356,8 @@ for(i in 1:num_unique_clusts){
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit, colRow=cluster_data$color2,RowSideColors=cluster_data$color)
labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
)
dev.off()
}
}