Add new configuration file
This commit is contained in:
121
workflow/.old/apps/shell/submodules/parse_study_info
Normal file
121
workflow/.old/apps/shell/submodules/parse_study_info
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env/bash
|
||||||
|
# @description Creates, modifies, and parses the study info file
|
||||||
|
#
|
||||||
|
# TODO
|
||||||
|
#
|
||||||
|
# * Needs refactoring
|
||||||
|
# * Ended up combining a few functions into one
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# @exitcode 0 If one or more studies found
|
||||||
|
# @exitcode 1 If no studies found
|
||||||
|
# @set STUDIES array contains array of "exp# sd ExpDir"
|
||||||
|
parse_study_info() {
|
||||||
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
|
# Only run this once per project
|
||||||
|
# in case we run multiple modules
|
||||||
|
(( SET_STUDIES )) && return 0
|
||||||
|
declare -g SET_STUDIES=1
|
||||||
|
|
||||||
|
# Use initials from project or whoami?
|
||||||
|
# Best I can do is first two letters of username
|
||||||
|
# See TODO in markdown
|
||||||
|
initials="${USER:0:2}"
|
||||||
|
INITIALS=${initials^^}
|
||||||
|
|
||||||
|
empty_study=1
|
||||||
|
# Find an Exp directory that does not exist
|
||||||
|
while [[ -d $STUDY_RESULTS_DIR/exp$empty_study ]]; do
|
||||||
|
(( empty_study++ ))
|
||||||
|
done
|
||||||
|
|
||||||
|
next_study_entry="$empty_study,$PROJECT_NAME,NA,NA,$INITIALS"
|
||||||
|
|
||||||
|
echo "${underline}Study Info File${nounderline}"
|
||||||
|
|
||||||
|
if [[ -f $STUDY_INFO_FILE ]]; then
|
||||||
|
# Get latest entry
|
||||||
|
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
|
||||||
|
studies_nums+=("$col1")
|
||||||
|
done < <(tail -n +2 "$STUDY_INFO_FILE")
|
||||||
|
largest=${studies_nums[0]}
|
||||||
|
for i in "${studies_nums[@]}"; do
|
||||||
|
if ((i > largest)); then
|
||||||
|
largest=$i
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
empty_study=$((largest+1))
|
||||||
|
next_study_entry="$((empty_study)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||||
|
else # create a default study info file
|
||||||
|
echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE"
|
||||||
|
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
|
||||||
|
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Print current studies
|
||||||
|
cat <<-EOF
|
||||||
|
* Give each experiment labels to be used for the plots and specific files.
|
||||||
|
* Enter the desired experiment names in the order they should appear in the REMc heatmaps
|
||||||
|
|
||||||
|
Current study info file contents:
|
||||||
|
|
||||||
|
${underline}$STUDY_INFO_FILE${nounderline}
|
||||||
|
$(cat "$STUDY_INFO_FILE")
|
||||||
|
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Allow user to add/edit the study info file
|
||||||
|
if ! ((YES)); then
|
||||||
|
for ((i=1; i<2; i++)); do
|
||||||
|
cat <<-EOF
|
||||||
|
Next entry suggestion: "$next_study_entry"
|
||||||
|
|
||||||
|
Would you like to:
|
||||||
|
* (a)dd the suggested entry
|
||||||
|
* (e)dit the study info file manually
|
||||||
|
* (c)ontinue (default)
|
||||||
|
EOF
|
||||||
|
read -r -p "(c): " response
|
||||||
|
echo ""
|
||||||
|
[[ -z $response ]] && break
|
||||||
|
case $response in
|
||||||
|
a)
|
||||||
|
echo "Adding auto-entry suggestion to $STUDY_INFO_FILE"
|
||||||
|
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
|
||||||
|
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||||
|
i=0
|
||||||
|
;;
|
||||||
|
e)
|
||||||
|
debug "$EDITOR $STUDY_INFO_FILE"
|
||||||
|
"$EDITOR" "$STUDY_INFO_FILE"
|
||||||
|
;;
|
||||||
|
c)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
err "Invalid response, please try again"
|
||||||
|
i=0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
break
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Read study info file
|
||||||
|
declare -ga STUDIES
|
||||||
|
while IFS=',' read -r num _ sd _; do
|
||||||
|
STUDIES+=("$num $sd $STUDY_RESULTS_DIR/exp$num")
|
||||||
|
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
|
||||||
|
|
||||||
|
# Initialize missing Exp dirs
|
||||||
|
for study in "${STUDIES[@]}"; do
|
||||||
|
read -r _ _ dir <<< "$study"
|
||||||
|
[[ -d $dir ]] || execute mkdir "$dir"
|
||||||
|
done
|
||||||
|
|
||||||
|
((DEBUG)) && declare -p STUDIES
|
||||||
|
|
||||||
|
# Return true if at least one study was found
|
||||||
|
[[ ${#STUDIES[@]} -gt 0 ]]
|
||||||
|
}
|
||||||
125
workflow/apps/python/join_interactions.py
Normal file
125
workflow/apps/python/join_interactions.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Function to parse and set arguments
|
||||||
|
def parse_arguments():
|
||||||
|
if len(sys.argv) == 1: # Interactive mode
|
||||||
|
args = [
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
|
||||||
|
2,
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"out_dir": os.path.abspath(args[0]),
|
||||||
|
"sd": float(args[1]),
|
||||||
|
"study_info": os.path.abspath(args[2]),
|
||||||
|
"input_dirs": args[3:]
|
||||||
|
}
|
||||||
|
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
# Create an array for the zscores files
|
||||||
|
def get_zscores_files(dirs):
|
||||||
|
files = [os.path.join(study, "zscores", "zscores_interaction.csv")
|
||||||
|
for study in dirs if os.path.exists(os.path.join(study, "zscores", "zscores_interaction.csv"))]
|
||||||
|
return files
|
||||||
|
|
||||||
|
zscores_files = get_zscores_files(args['input_dirs'])
|
||||||
|
print(f"The SD value is: {args['sd']}")
|
||||||
|
|
||||||
|
# Ensure there are enough files to compare
|
||||||
|
if len(zscores_files) < 2:
|
||||||
|
sys.exit("Not enough experiments to compare, exiting script")
|
||||||
|
|
||||||
|
# Function to join zscores files
|
||||||
|
def join_zscores_files(files):
|
||||||
|
joined_data = pd.read_csv(files[0])
|
||||||
|
for file in files[1:]:
|
||||||
|
temp_data = pd.read_csv(file)
|
||||||
|
joined_data = pd.merge(joined_data, temp_data, on="OrfRep", how="outer")
|
||||||
|
return joined_data
|
||||||
|
|
||||||
|
# Load and join zscores files
|
||||||
|
joined_data = join_zscores_files(zscores_files)
|
||||||
|
|
||||||
|
# Order and select columns
|
||||||
|
def order_and_select_columns(data):
|
||||||
|
ordered_data = data[sorted(data.columns)]
|
||||||
|
selected_headers = ordered_data.filter(regex="OrfRep|Gene|z_lm_k|z_shift_k|z_lm_l|z_shift_l")
|
||||||
|
return selected_headers
|
||||||
|
|
||||||
|
selected_headers = order_and_select_columns(joined_data)
|
||||||
|
|
||||||
|
# Remove redundant columns like "Gene.1"
|
||||||
|
def clean_headers(data, suffixes):
|
||||||
|
suffixes_to_remove = [f"Gene.{i}" for i in range(1, suffixes+1)]
|
||||||
|
return data.drop(columns=suffixes_to_remove, errors='ignore')
|
||||||
|
|
||||||
|
headSel = clean_headers(selected_headers, len(zscores_files) - 1)
|
||||||
|
headSel2 = clean_headers(joined_data.filter(regex="OrfRep|Gene"), len(zscores_files) - 1)
|
||||||
|
|
||||||
|
# Fill NA values in Shift and Z_lm columns
|
||||||
|
def fill_na_in_columns(data):
|
||||||
|
for column in data.columns:
|
||||||
|
if "Shift" in column:
|
||||||
|
data[column].fillna(0.001, inplace=True)
|
||||||
|
elif "Z_lm_" in column:
|
||||||
|
data[column].fillna(0.0001, inplace=True)
|
||||||
|
return data
|
||||||
|
|
||||||
|
headSel = fill_na_in_columns(headSel)
|
||||||
|
|
||||||
|
# Filter based on standard deviation
|
||||||
|
def filter_by_sd(data, sd):
|
||||||
|
if sd == 0:
|
||||||
|
return data
|
||||||
|
z_lm_cols = data.filter(regex="z_lm_")
|
||||||
|
filter_vector = z_lm_cols.abs().ge(sd).any(axis=1)
|
||||||
|
return data[filter_vector]
|
||||||
|
|
||||||
|
REMcRdy = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_lm_"), args['sd'])
|
||||||
|
shiftOnly = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_shift"), args['sd'])
|
||||||
|
|
||||||
|
# Reorder columns to interleave Z_lm and Shift data
|
||||||
|
def reorder_columns(data1, data2):
|
||||||
|
combined_data = data1.copy()
|
||||||
|
for i in range(2, data1.shape[1]):
|
||||||
|
combined_data.insert(2 * i - 1, data2.columns[i], data2.iloc[:, i])
|
||||||
|
return combined_data
|
||||||
|
|
||||||
|
combI = reorder_columns(headSel2, shiftOnly)
|
||||||
|
|
||||||
|
# Write output files
|
||||||
|
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
|
||||||
|
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
|
||||||
|
|
||||||
|
# Relabel headers using experiment names from StudyInfo.csv
|
||||||
|
def relabel_headers(headers, labels):
|
||||||
|
new_labels = headers.copy()
|
||||||
|
for i, header in enumerate(headers):
|
||||||
|
suffix = header.split('.')[-1]
|
||||||
|
if suffix.isdigit() and int(suffix) in range(1, 4):
|
||||||
|
exp_name = labels.iloc[int(suffix) - 1, 1]
|
||||||
|
new_labels[i] = header.replace(f".{suffix}", f"_{exp_name}")
|
||||||
|
return new_labels
|
||||||
|
|
||||||
|
LabelStd = pd.read_csv(args['study_info'])
|
||||||
|
|
||||||
|
shiftOnly.columns = relabel_headers(shiftOnly.columns, LabelStd)
|
||||||
|
REMcRdy.columns = relabel_headers(REMcRdy.columns, LabelStd)
|
||||||
|
|
||||||
|
# Save relabeled files
|
||||||
|
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
|
||||||
|
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
|
||||||
|
|
||||||
|
# Save updated parameters
|
||||||
|
LabelStd.iloc[:, 3] = args['sd']
|
||||||
|
LabelStd.to_csv(os.path.join(args['out_dir'], "parameters.csv"), index=False)
|
||||||
|
LabelStd.to_csv(args['study_info'], index=False)
|
||||||
@@ -1,12 +1,4 @@
|
|||||||
#!/usr/bin/env Rscript
|
|
||||||
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
|
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
|
||||||
#
|
|
||||||
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
|
||||||
# NOTE: The two required arguments are the same and now there are two optional arguments
|
|
||||||
# 1. Exp1
|
|
||||||
# 2. Exp2
|
|
||||||
# 3. StudyInfo.csv file
|
|
||||||
# 4. Output Directory
|
|
||||||
|
|
||||||
library("ggplot2")
|
library("ggplot2")
|
||||||
library("plotly")
|
library("plotly")
|
||||||
@@ -16,31 +8,14 @@ library("grid")
|
|||||||
library("ggthemes")
|
library("ggthemes")
|
||||||
|
|
||||||
args <- commandArgs(TRUE)
|
args <- commandArgs(TRUE)
|
||||||
exp_name <- args[1]
|
exp1_name <- args[1]
|
||||||
exp_name2 <- args[2]
|
exp1_file <- args[2]
|
||||||
|
exp2_name <- args[3]
|
||||||
|
exp2_file <- args[4]
|
||||||
|
output_dir <- args[5]
|
||||||
|
|
||||||
if (length(args) >= 3) {
|
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
|
||||||
study_info_file <- args[3]
|
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
|
||||||
} else {
|
|
||||||
study_info_file <- "StudyInfo.csv"
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(args) >= 4) {
|
|
||||||
output_dir <- args[4]
|
|
||||||
} else {
|
|
||||||
output_dir <- "gta"
|
|
||||||
}
|
|
||||||
|
|
||||||
expNumber1 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name))
|
|
||||||
expNumber2 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2))
|
|
||||||
Labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
|
|
||||||
Name1 <- Labels[expNumber1, 2]
|
|
||||||
Name2 <- Labels[expNumber2, 2]
|
|
||||||
go_terms_file <- "Average_GOTerms_All.csv"
|
|
||||||
input_file1 <- file.path(output_dir, exp_name, go_terms_file)
|
|
||||||
input_file2 <- file.path(output_dir, exp_name2, go_terms_file)
|
|
||||||
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp_name, "-", exp_name2))
|
|
||||||
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp_name, "-", exp_name2))
|
|
||||||
|
|
||||||
# Pairwise L
|
# Pairwise L
|
||||||
# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
|
# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
|
||||||
@@ -75,21 +50,6 @@ theme_Publication <- function(base_size = 14, base_family = "sans") {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
scale_fill_Publication <- function(...) {
|
|
||||||
library(scales)
|
|
||||||
discrete_scale("fill", "Publication", manual_pal(
|
|
||||||
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
|
||||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
scale_colour_Publication <- function(...) {
|
|
||||||
discrete_scale("colour", "Publication", manual_pal(
|
|
||||||
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
|
||||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
|
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
|
||||||
(theme_foundation(base_size = base_size, base_family = base_family) +
|
(theme_foundation(base_size = base_size, base_family = base_family) +
|
||||||
theme(
|
theme(
|
||||||
@@ -131,8 +91,8 @@ scale_colour_Publication <- function(...) {
|
|||||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
|
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
|
||||||
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
|
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
|
||||||
|
|
||||||
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
||||||
gg <- ggplot(data = X, aes(
|
gg <- ggplot(data = X, aes(
|
||||||
@@ -146,7 +106,7 @@ gg <- ggplot(data = X, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(
|
geom_rect(
|
||||||
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
||||||
color = "grey20",
|
color = "grey20",
|
||||||
@@ -156,12 +116,12 @@ gg <- ggplot(data = X, aes(
|
|||||||
fill = NA
|
fill = NA
|
||||||
) +
|
) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -169,7 +129,7 @@ pdf(
|
|||||||
gg
|
gg
|
||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||||
@@ -184,19 +144,19 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm
|
|||||||
X$Overlap_Avg <- NA
|
X$Overlap_Avg <- NA
|
||||||
|
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||||
|
|
||||||
gg <- ggplot(data = X, aes(
|
gg <- ggplot(data = X, aes(
|
||||||
x = Z_lm_L_Avg_X1,
|
x = Z_lm_L_Avg_X1,
|
||||||
@@ -209,7 +169,7 @@ gg <- ggplot(data = X, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(
|
geom_rect(
|
||||||
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
||||||
color = "grey20",
|
color = "grey20",
|
||||||
@@ -219,12 +179,12 @@ gg <- ggplot(data = X, aes(
|
|||||||
fill = NA
|
fill = NA
|
||||||
) +
|
) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -232,7 +192,7 @@ pdf(
|
|||||||
gg
|
gg
|
||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
||||||
@@ -249,15 +209,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -266,7 +226,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#4
|
#4
|
||||||
@@ -282,15 +242,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -299,7 +259,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
# Only output GTA terms where average score is still above 2 after subtracting the SD
|
# Only output GTA terms where average score is still above 2 after subtracting the SD
|
||||||
@@ -320,16 +280,16 @@ X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 &
|
|||||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
|
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
|
||||||
X$Overlap <- NA
|
X$Overlap <- NA
|
||||||
|
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
|
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
|
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
|
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||||
|
|
||||||
#5
|
#5
|
||||||
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
||||||
@@ -345,15 +305,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -362,7 +322,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#6
|
#6
|
||||||
@@ -377,16 +337,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||||
width = 20,
|
width = 20,
|
||||||
height = 20
|
height = 20
|
||||||
)
|
)
|
||||||
@@ -395,7 +355,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
|
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
X_abovethreshold$X1_Rank <- NA
|
X_abovethreshold$X1_Rank <- NA
|
||||||
@@ -415,16 +375,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||||
width = 15,
|
width = 15,
|
||||||
height = 15
|
height = 15
|
||||||
)
|
)
|
||||||
@@ -435,7 +395,7 @@ pgg <- ggplotly(gg)
|
|||||||
#pgg
|
#pgg
|
||||||
|
|
||||||
fname <-
|
fname <-
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#8
|
#8
|
||||||
@@ -450,16 +410,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_L_SD_X1,
|
SD_1 = Z_lm_L_SD_X1,
|
||||||
SD_2 = Z_lm_L_SD_X2
|
SD_2 = Z_lm_L_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||||
width = 15,
|
width = 15,
|
||||||
height = 15
|
height = 15
|
||||||
)
|
)
|
||||||
@@ -469,18 +429,18 @@ dev.off()
|
|||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <-
|
fname <-
|
||||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
write.csv(
|
write.csv(
|
||||||
x = X,
|
x = X,
|
||||||
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||||
row.names = FALSE
|
row.names = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
write.csv(
|
write.csv(
|
||||||
x = X_abovethreshold,
|
x = X_abovethreshold,
|
||||||
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||||
row.names = FALSE
|
row.names = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -582,8 +542,8 @@ scale_colour_Publication <- function(...) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
|
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
|
||||||
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
|
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
|
||||||
|
|
||||||
#1
|
#1
|
||||||
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
||||||
@@ -599,15 +559,15 @@ gg <- ggplot(data = X, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -616,7 +576,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#2
|
#2
|
||||||
@@ -632,23 +592,23 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm
|
|||||||
X$Overlap_Avg <- NA
|
X$Overlap_Avg <- NA
|
||||||
|
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
"Overlapping_Deletion_Suppressors")
|
"Overlapping_Deletion_Suppressors")
|
||||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||||
"Overlapping_Deletion_Enhancers")
|
"Overlapping_Deletion_Enhancers")
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||||
|
|
||||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||||
gg <- ggplot(data = X, aes(
|
gg <- ggplot(data = X, aes(
|
||||||
x = Z_lm_K_Avg_X1,
|
x = Z_lm_K_Avg_X1,
|
||||||
y = Z_lm_K_Avg_X2,
|
y = Z_lm_K_Avg_X2,
|
||||||
@@ -660,15 +620,15 @@ gg <- ggplot(data = X, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -678,12 +638,12 @@ dev.off()
|
|||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
|
|
||||||
#2
|
#2
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#3
|
#3
|
||||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
||||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||||
gg <- ggplot(data = x_rem2_gene, aes(
|
gg <- ggplot(data = x_rem2_gene, aes(
|
||||||
x = Z_lm_K_Avg_X1,
|
x = Z_lm_K_Avg_X1,
|
||||||
y = Z_lm_K_Avg_X2,
|
y = Z_lm_K_Avg_X2,
|
||||||
@@ -695,15 +655,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -712,7 +672,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#4
|
#4
|
||||||
@@ -728,15 +688,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -746,7 +706,7 @@ dev.off()
|
|||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
|
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#5
|
#5
|
||||||
@@ -769,21 +729,21 @@ X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 &
|
|||||||
X$Overlap <- NA
|
X$Overlap <- NA
|
||||||
|
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
|
||||||
"Overlapping_Deletion_Suppressors")
|
"Overlapping_Deletion_Suppressors")
|
||||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
|
||||||
"Overlapping_Deletion_Enhancers")
|
"Overlapping_Deletion_Enhancers")
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
||||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||||
|
|
||||||
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
||||||
gg <- ggplot(data = X_abovethreshold, aes(
|
gg <- ggplot(data = X_abovethreshold, aes(
|
||||||
@@ -797,15 +757,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3) +
|
geom_point(shape = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||||
width = 12,
|
width = 12,
|
||||||
height = 8
|
height = 8
|
||||||
)
|
)
|
||||||
@@ -814,7 +774,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#6
|
#6
|
||||||
@@ -829,16 +789,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||||
width = 20,
|
width = 20,
|
||||||
height = 20
|
height = 20
|
||||||
)
|
)
|
||||||
@@ -846,7 +806,7 @@ gg
|
|||||||
dev.off()
|
dev.off()
|
||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
|
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#7
|
#7
|
||||||
@@ -866,16 +826,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||||
width = 15,
|
width = 15,
|
||||||
height = 15
|
height = 15
|
||||||
)
|
)
|
||||||
@@ -885,7 +845,7 @@ dev.off()
|
|||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <-
|
fname <-
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
#8
|
#8
|
||||||
@@ -900,16 +860,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
|||||||
SD_1 = Z_lm_K_SD_X1,
|
SD_1 = Z_lm_K_SD_X1,
|
||||||
SD_2 = Z_lm_K_SD_X2
|
SD_2 = Z_lm_K_SD_X2
|
||||||
)) +
|
)) +
|
||||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||||
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
||||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
geom_point(shape = 3, size = 3) +
|
geom_point(shape = 3, size = 3) +
|
||||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||||
theme_Publication_legend_right()
|
theme_Publication_legend_right()
|
||||||
|
|
||||||
pdf(
|
pdf(
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||||
width = 15,
|
width = 15,
|
||||||
height = 15
|
height = 15
|
||||||
)
|
)
|
||||||
@@ -919,17 +879,17 @@ dev.off()
|
|||||||
pgg <- ggplotly(gg)
|
pgg <- ggplotly(gg)
|
||||||
#pgg
|
#pgg
|
||||||
fname <-
|
fname <-
|
||||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||||
htmlwidgets::saveWidget(pgg, fname)
|
htmlwidgets::saveWidget(pgg, fname)
|
||||||
|
|
||||||
write.csv(
|
write.csv(
|
||||||
x = X,
|
x = X,
|
||||||
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||||
row.names = FALSE
|
row.names = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
write.csv(
|
write.csv(
|
||||||
x = X_abovethreshold,
|
x = X_abovethreshold,
|
||||||
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||||
row.names = FALSE
|
row.names = FALSE
|
||||||
)
|
)
|
||||||
|
|||||||
335
workflow/apps/r/calculate_interaction_zscores.R
Normal file
335
workflow/apps/r/calculate_interaction_zscores.R
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
suppressMessages({
|
||||||
|
if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
|
||||||
|
if (!require("plotly")) stop("Package plotly is required but not installed.")
|
||||||
|
if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
|
||||||
|
if (!require("dplyr")) stop("Package dplyr is required but not installed.")
|
||||||
|
if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
|
||||||
|
if (!require("plyr")) stop("Package plyr is required but not installed.")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Constants for configuration
|
||||||
|
PLOT_WIDTH <- 14
|
||||||
|
PLOT_HEIGHT <- 9
|
||||||
|
BASE_SIZE <- 14
|
||||||
|
|
||||||
|
options(warn = 2, max.print = 100)
|
||||||
|
|
||||||
|
parse_arguments <- function() {
|
||||||
|
args <- if (interactive()) {
|
||||||
|
c(
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
|
||||||
|
3,
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
|
||||||
|
"Experiment 1: Doxo versus HLD",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
|
||||||
|
"Experiment 2: HLD versus Doxo",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3",
|
||||||
|
"Experiment 3: HLD versus WT",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4",
|
||||||
|
"Experiment 4: Doxo versus WT"
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
commandArgs(trailingOnly = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
|
||||||
|
names <- args[seq(6, length(args), by = 2)]
|
||||||
|
experiments <- setNames(paths, names)
|
||||||
|
|
||||||
|
list(
|
||||||
|
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||||
|
sd = as.numeric(args[2]),
|
||||||
|
sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||||
|
easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
|
||||||
|
experiments = experiments
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
args <- parse_arguments()
|
||||||
|
|
||||||
|
dir.create(args$out_dir, showWarnings = FALSE)
|
||||||
|
|
||||||
|
# Define themes and scales
|
||||||
|
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||||
|
theme_foundation(base_size = base_size, base_family = base_family) +
|
||||||
|
theme(
|
||||||
|
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
||||||
|
text = element_text(),
|
||||||
|
panel.background = element_rect(colour = NA),
|
||||||
|
plot.background = element_rect(colour = NA),
|
||||||
|
panel.border = element_rect(colour = NA),
|
||||||
|
axis.title = element_text(face = "bold", size = rel(1)),
|
||||||
|
axis.title.y = element_text(angle = 90, vjust = 2),
|
||||||
|
axis.title.x = element_text(vjust = -0.2),
|
||||||
|
axis.line = element_line(colour = "black"),
|
||||||
|
panel.grid.major = element_line(colour = "#f0f0f0"),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
legend.key = element_rect(colour = NA),
|
||||||
|
legend.position = "bottom",
|
||||||
|
legend.direction = "horizontal",
|
||||||
|
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||||
|
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||||
|
strip.text = element_text(face = "bold")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||||
|
theme_publication(base_size, base_family) +
|
||||||
|
theme(
|
||||||
|
legend.position = "right",
|
||||||
|
legend.direction = "vertical",
|
||||||
|
legend.key.size = unit(0.5, "cm"),
|
||||||
|
legend.spacing = unit(0, "cm"),
|
||||||
|
legend.title = element_text(face = "italic")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
scale_fill_publication <- function(...) {
|
||||||
|
discrete_scale("fill", "Publication", manual_pal(values = c(
|
||||||
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||||
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||||
|
)), ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
scale_colour_publication <- function(...) {
|
||||||
|
discrete_scale("colour", "Publication", manual_pal(values = c(
|
||||||
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||||
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||||
|
)), ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load SGD gene list
|
||||||
|
sgd_genes <- function(sgd_gene_list) {
|
||||||
|
read.delim(file = sgd_gene_list, quote = "", header = FALSE,
|
||||||
|
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
|
||||||
|
dplyr::rename(ORF = V4, GeneName = V5)
|
||||||
|
}
|
||||||
|
|
||||||
|
genes <- sgd_genes(args$sgd_gene_list)
|
||||||
|
|
||||||
|
load_and_preprocess_data <- function(easy_results_file, genes) {
|
||||||
|
df <- tryCatch({
|
||||||
|
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error reading file: ", easy_results_file, "\n", e$message)
|
||||||
|
}) %>%
|
||||||
|
filter(!.[[1]] %in% c("", "Scan")) # Fixed syntax
|
||||||
|
|
||||||
|
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
|
||||||
|
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
|
||||||
|
lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
|
||||||
|
|
||||||
|
df <- df %>%
|
||||||
|
mutate(
|
||||||
|
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
|
||||||
|
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
|
||||||
|
conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
|
||||||
|
delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
|
||||||
|
LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
|
||||||
|
GeneName = vapply(ORF, function(orf) {
|
||||||
|
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
|
||||||
|
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
|
||||||
|
}, character(1)) # Ensures a character vector is returned
|
||||||
|
)
|
||||||
|
|
||||||
|
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
|
||||||
|
|
||||||
|
return(df)
|
||||||
|
}
|
||||||
|
|
||||||
|
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
|
||||||
|
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
|
||||||
|
filtered_df <- filter(df, is.finite(.data[[var]]))
|
||||||
|
|
||||||
|
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
|
||||||
|
plot_func(shape = 3, size = 0.2, position = "jitter") +
|
||||||
|
stat_summary(fun = mean, geom = "point", size = 0.6) +
|
||||||
|
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
|
||||||
|
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
|
||||||
|
theme_publication()
|
||||||
|
|
||||||
|
pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
|
||||||
|
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
|
||||||
|
print(p)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
html_path <- sub(".pdf$", ".html", pdf_path)
|
||||||
|
pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
|
||||||
|
layout(legend = list(orientation = "h")))
|
||||||
|
saveWidget(pgg, html_path, selfcontained = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
publish_summary_stats <- function(df, variables, out_dir) {
|
||||||
|
stats_list <- lapply(variables, function(var) {
|
||||||
|
df %>%
|
||||||
|
dplyr::group_by(OrfRep, conc_num) %>%
|
||||||
|
dplyr::summarize(
|
||||||
|
N = dplyr::n(), # Ensure that the correct version of n() is used
|
||||||
|
mean_val = mean(.data[[var]], na.rm = TRUE),
|
||||||
|
sd_val = sd(.data[[var]], na.rm = TRUE),
|
||||||
|
se_val = sd_val / sqrt(N)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
|
||||||
|
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
publish_interaction_scores <- function(df, out_dir) {
|
||||||
|
interaction_scores <- df %>%
|
||||||
|
group_by(OrfRep) %>%
|
||||||
|
summarize(
|
||||||
|
mean_L = mean(L, na.rm = TRUE),
|
||||||
|
mean_K = mean(K, na.rm = TRUE),
|
||||||
|
mean_r = mean(r, na.rm = TRUE),
|
||||||
|
mean_AUC = mean(AUC, na.rm = TRUE),
|
||||||
|
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
|
||||||
|
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
l_rank = rank(mean_L),
|
||||||
|
k_rank = rank(mean_K),
|
||||||
|
r_rank = rank(mean_r),
|
||||||
|
auc_rank = rank(mean_AUC)
|
||||||
|
)
|
||||||
|
|
||||||
|
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
|
||||||
|
write.csv(arrange(interaction_scores, l_rank, k_rank),
|
||||||
|
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
publish_zscores <- function(df, out_dir) {
|
||||||
|
zscores <- df %>%
|
||||||
|
mutate(
|
||||||
|
zscore_L = scale(L, center = TRUE, scale = TRUE),
|
||||||
|
zscore_K = scale(K, center = TRUE, scale = TRUE),
|
||||||
|
zscore_r = scale(r, center = TRUE, scale = TRUE),
|
||||||
|
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
|
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
|
||||||
|
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||||
|
lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
|
||||||
|
delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
|
||||||
|
lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
|
||||||
|
plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
|
||||||
|
}
|
||||||
|
|
||||||
|
process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
|
||||||
|
out_dir <- file.path(exp_dir, "zscores")
|
||||||
|
out_dir_qc <- file.path(exp_dir, "qc")
|
||||||
|
dir.create(out_dir, showWarnings = FALSE)
|
||||||
|
dir.create(out_dir_qc, showWarnings = FALSE)
|
||||||
|
df <- load_and_preprocess_data(easy_results_file, genes)
|
||||||
|
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
|
||||||
|
|
||||||
|
generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
|
||||||
|
|
||||||
|
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||||
|
publish_summary_stats(df, variables, out_dir)
|
||||||
|
publish_interaction_scores(df, out_dir)
|
||||||
|
publish_zscores(df, out_dir)
|
||||||
|
|
||||||
|
list(
|
||||||
|
zscores_file = file.path(out_dir, "zscores_interaction.csv"),
|
||||||
|
exp_name = exp_name
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
processed_experiments <- lapply(names(args$experiments), function(exp_name) {
|
||||||
|
process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
|
||||||
|
})
|
||||||
|
|
||||||
|
zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
|
||||||
|
exp_names <- sapply(processed_experiments, `[[`, "exp_name")
|
||||||
|
|
||||||
|
combine_zscores <- function(zscores_files) {
|
||||||
|
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
|
||||||
|
|
||||||
|
joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
|
||||||
|
for (file in zscores_files[-1]) {
|
||||||
|
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
|
||||||
|
joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full")
|
||||||
|
}
|
||||||
|
joined_data
|
||||||
|
}
|
||||||
|
|
||||||
|
process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
|
||||||
|
ordered_data <- joined_data %>%
|
||||||
|
select(contains("OrfRep"), matches("Gene"),
|
||||||
|
contains("z_lm_k"), contains("z_shift_k"),
|
||||||
|
contains("z_lm_l"), contains("z_shift_l")) %>%
|
||||||
|
arrange(contains("OrfRep"))
|
||||||
|
|
||||||
|
clean_headers <- function(data, suffixes) {
|
||||||
|
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
|
||||||
|
select(data, -all_of(suffixes_to_remove))
|
||||||
|
}
|
||||||
|
|
||||||
|
headSel <- clean_headers(ordered_data, length(zscores_files) - 1)
|
||||||
|
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
|
||||||
|
|
||||||
|
fill_na_in_columns <- function(data) {
|
||||||
|
for (header in colnames(data)) {
|
||||||
|
if (grepl("Shift", header)) {
|
||||||
|
data[[header]][is.na(data[[header]])] <- 0.001
|
||||||
|
} else if (grepl("Z_lm_", header)) {
|
||||||
|
data[[header]][is.na(data[[header]])] <- 0.0001
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data
|
||||||
|
}
|
||||||
|
|
||||||
|
headSel <- fill_na_in_columns(headSel)
|
||||||
|
|
||||||
|
filter_by_sd <- function(data, sd) {
|
||||||
|
if (sd == 0) return(data)
|
||||||
|
|
||||||
|
z_lm_cols <- select(data, contains("z_lm_"))
|
||||||
|
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
|
||||||
|
data[filter_vector, ]
|
||||||
|
}
|
||||||
|
|
||||||
|
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
|
||||||
|
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
|
||||||
|
|
||||||
|
reorder_columns <- function(data1, data2) {
|
||||||
|
combined_data <- data1
|
||||||
|
for (i in 3:ncol(data1)) {
|
||||||
|
combined_data <- cbind(combined_data, data2[i], data1[i])
|
||||||
|
}
|
||||||
|
combined_data
|
||||||
|
}
|
||||||
|
|
||||||
|
combI <- reorder_columns(headSel2, shiftOnly)
|
||||||
|
|
||||||
|
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
|
||||||
|
relabel_headers <- function(headers, exp_names) {
|
||||||
|
new_labels <- headers
|
||||||
|
for (i in seq_along(headers)) {
|
||||||
|
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
|
||||||
|
if (suffix %in% seq_along(exp_names)) {
|
||||||
|
exp_name <- exp_names[as.numeric(suffix)]
|
||||||
|
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_labels
|
||||||
|
}
|
||||||
|
|
||||||
|
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
|
||||||
|
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
|
||||||
|
|
||||||
|
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
joined_data <- combine_zscores(zscores_files)
|
||||||
|
process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)
|
||||||
209
workflow/apps/r/calculate_pairwise_lk.R
Normal file
209
workflow/apps/r/calculate_pairwise_lk.R
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
suppressMessages({
|
||||||
|
library("ggplot2")
|
||||||
|
library("plotly")
|
||||||
|
library("htmlwidgets")
|
||||||
|
library("extrafont")
|
||||||
|
library("grid")
|
||||||
|
library("ggthemes")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Constants for configuration
|
||||||
|
PLOT_WIDTH <- 12
|
||||||
|
PLOT_HEIGHT <- 8
|
||||||
|
BASE_SIZE <- 14
|
||||||
|
|
||||||
|
options(warn = 2, max.print = 100)
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
parse_arguments <- function() {
|
||||||
|
if (interactive()) {
|
||||||
|
args <- c(
|
||||||
|
"Exp1",
|
||||||
|
"/path/to/exp1_file.csv",
|
||||||
|
"Exp2",
|
||||||
|
"/path/to/exp2_file.csv",
|
||||||
|
"/path/to/output_dir"
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
}
|
||||||
|
list(
|
||||||
|
exp1_name = args[1],
|
||||||
|
exp1_file = normalizePath(args[2], mustWork = TRUE),
|
||||||
|
exp2_name = args[3],
|
||||||
|
exp2_file = normalizePath(args[4], mustWork = TRUE),
|
||||||
|
output_dir = normalizePath(args[5], mustWork = FALSE)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
args <- parse_arguments()
|
||||||
|
|
||||||
|
# Create output directories if they don't exist
|
||||||
|
pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
|
||||||
|
pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
|
||||||
|
dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
|
||||||
|
# Define themes and scales
|
||||||
|
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||||
|
theme_foundation(base_size = base_size, base_family = base_family) +
|
||||||
|
theme(
|
||||||
|
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
||||||
|
panel.background = element_rect(colour = NA),
|
||||||
|
plot.background = element_rect(colour = NA),
|
||||||
|
panel.border = element_rect(colour = NA),
|
||||||
|
axis.title = element_text(face = "bold", size = rel(1)),
|
||||||
|
axis.line = element_line(colour = "black"),
|
||||||
|
panel.grid.major = element_line(colour = "#f0f0f0"),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
legend.position = "bottom",
|
||||||
|
legend.direction = "horizontal",
|
||||||
|
legend.key.size = unit(0.2, "cm"),
|
||||||
|
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||||
|
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||||
|
strip.text = element_text(face = "bold")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||||
|
theme_publication(base_size, base_family) +
|
||||||
|
theme(
|
||||||
|
legend.position = "right",
|
||||||
|
legend.direction = "vertical",
|
||||||
|
legend.key.size = unit(0.5, "cm")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
scale_fill_publication <- function(...) {
|
||||||
|
discrete_scale("fill", "Publication", manual_pal(values = c(
|
||||||
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||||
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||||
|
)), ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
scale_colour_publication <- function(...) {
|
||||||
|
discrete_scale("colour", "Publication", manual_pal(values = c(
|
||||||
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||||
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||||
|
)), ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Read and merge data
|
||||||
|
load_and_merge_data <- function(file1, file2) {
|
||||||
|
df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
|
||||||
|
df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
|
||||||
|
merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate a plot and save to PDF and HTML
|
||||||
|
generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
|
||||||
|
gg <- ggplot(data = data, aes_string(
|
||||||
|
x = x_var,
|
||||||
|
y = y_var,
|
||||||
|
color = color_var
|
||||||
|
)) +
|
||||||
|
xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
|
||||||
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||||
|
geom_point(shape = 3) +
|
||||||
|
ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
|
||||||
|
ggtitle(title) +
|
||||||
|
theme_function()
|
||||||
|
|
||||||
|
pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
|
||||||
|
print(gg)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
pgg <- ggplotly(gg)
|
||||||
|
htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Identify and annotate specific interactions
|
||||||
|
annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
|
||||||
|
df$Overlap_Avg <- NA
|
||||||
|
interactions <- list(
|
||||||
|
"df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
|
||||||
|
"df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
|
||||||
|
"df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
|
||||||
|
"df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
|
||||||
|
"Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
|
||||||
|
"Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
|
||||||
|
"df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
|
||||||
|
"df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
|
||||||
|
)
|
||||||
|
annotation <- list(
|
||||||
|
df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
|
||||||
|
df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
|
||||||
|
df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
|
||||||
|
df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
|
||||||
|
Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
|
||||||
|
Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
|
||||||
|
df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
|
||||||
|
df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
|
||||||
|
)
|
||||||
|
for (key in names(interactions)) {
|
||||||
|
try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
|
||||||
|
}
|
||||||
|
df
|
||||||
|
}
|
||||||
|
|
||||||
|
# Rank and filter data
|
||||||
|
rank_and_filter_data <- function(df, suffix) {
|
||||||
|
z1 <- df
|
||||||
|
z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
||||||
|
z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
||||||
|
|
||||||
|
z2 <- df
|
||||||
|
z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
||||||
|
z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
||||||
|
|
||||||
|
df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
|
||||||
|
df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
|
||||||
|
df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
|
||||||
|
|
||||||
|
list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main execution function for a pairwise comparison
|
||||||
|
run_pairwise_comparison <- function(suffix, dir) {
|
||||||
|
df <- load_and_merge_data(args$exp1_file, args$exp2_file)
|
||||||
|
|
||||||
|
# Generate initial ontology-based plot
|
||||||
|
generate_plot(df,
|
||||||
|
paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
|
||||||
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
||||||
|
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
|
||||||
|
theme_publication_legend_right)
|
||||||
|
|
||||||
|
# Annotate interactions and generate overlap-based plot
|
||||||
|
df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
|
||||||
|
ranks <- rank_and_filter_data(df, suffix)
|
||||||
|
|
||||||
|
generate_plot(df,
|
||||||
|
paste0("Z_lm_", suffix, "_Avg_df1"),
|
||||||
|
paste0("Z_lm_", suffix, "_Avg_df2"),
|
||||||
|
"Overlap_Avg",
|
||||||
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
||||||
|
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
|
||||||
|
theme_publication_legend_right)
|
||||||
|
|
||||||
|
generate_plot(ranks$df_above_threshold,
|
||||||
|
paste0("Z_lm_", suffix, "_Avg_df1"),
|
||||||
|
paste0("Z_lm_", suffix, "_Avg_df2"),
|
||||||
|
"Overlap_Avg",
|
||||||
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
|
||||||
|
file.path(dir,
|
||||||
|
paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
|
||||||
|
theme_publication_legend_right)
|
||||||
|
|
||||||
|
# Save CSV files
|
||||||
|
write.csv(df, file.path(dir,
|
||||||
|
paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
||||||
|
row.names = FALSE)
|
||||||
|
write.csv(ranks$df_above_threshold,
|
||||||
|
file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
||||||
|
row.names = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute Pairwise L and Pairwise K comparisons
|
||||||
|
run_pairwise_comparison("L", pairDirL)
|
||||||
|
run_pairwise_comparison("K", pairDirK)
|
||||||
@@ -14,7 +14,7 @@ BASE_SIZE <- 14
|
|||||||
|
|
||||||
options(warn = 2, max.print = 100)
|
options(warn = 2, max.print = 100)
|
||||||
|
|
||||||
# Function to parse arguments
|
# Parse arguments
|
||||||
parse_arguments <- function() {
|
parse_arguments <- function() {
|
||||||
if (interactive()) {
|
if (interactive()) {
|
||||||
args <- c(
|
args <- c(
|
||||||
@@ -92,7 +92,7 @@ scale_colour_publication <- function(...) {
|
|||||||
)), ...)
|
)), ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to load and preprocess data
|
# Load and preprocess data
|
||||||
load_and_preprocess_data <- function(input_file) {
|
load_and_preprocess_data <- function(input_file) {
|
||||||
df <- tryCatch({
|
df <- tryCatch({
|
||||||
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||||
@@ -124,7 +124,7 @@ df <- df %>%
|
|||||||
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
|
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
|
||||||
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
|
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
|
||||||
|
|
||||||
# Function to create plot
|
# Create plot
|
||||||
create_plot <- function(df, var, plot_type) {
|
create_plot <- function(df, var, plot_type) {
|
||||||
filtered_df <- df %>% filter(is.finite(.data[[var]]))
|
filtered_df <- df %>% filter(is.finite(.data[[var]]))
|
||||||
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
|
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
|
||||||
@@ -143,7 +143,7 @@ create_plot <- function(df, var, plot_type) {
|
|||||||
return(p)
|
return(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to publish plot to PDF and HTML (Plotly)
|
# Publish plot to PDF and HTML (Plotly)
|
||||||
publish_plot <- function(plot, plot_path) {
|
publish_plot <- function(plot, plot_path) {
|
||||||
# if (file.exists(plot_path)) {
|
# if (file.exists(plot_path)) {
|
||||||
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
|
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
|
||||||
@@ -159,7 +159,7 @@ publish_plot <- function(plot, plot_path) {
|
|||||||
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
|
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to publish multiple plots
|
# Publish multiple plots
|
||||||
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
|
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
|
||||||
lapply(variables, function(var) {
|
lapply(variables, function(var) {
|
||||||
plot <- create_plot(df, var, plot_type)
|
plot <- create_plot(df, var, plot_type)
|
||||||
@@ -167,7 +167,7 @@ publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to calculate and publish summary statistics
|
# Calculate and publish summary statistics
|
||||||
publish_summary_stats <- function(df, variables, out_dir) {
|
publish_summary_stats <- function(df, variables, out_dir) {
|
||||||
stats_list <- lapply(variables, function(var) {
|
stats_list <- lapply(variables, function(var) {
|
||||||
df %>%
|
df %>%
|
||||||
@@ -183,7 +183,7 @@ publish_summary_stats <- function(df, variables, out_dir) {
|
|||||||
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to calculate and publish interaction scores
|
# Calculate and publish interaction scores
|
||||||
publish_interaction_scores <- function(df, out_dir) {
|
publish_interaction_scores <- function(df, out_dir) {
|
||||||
interaction_scores <- df %>%
|
interaction_scores <- df %>%
|
||||||
group_by(OrfRep) %>%
|
group_by(OrfRep) %>%
|
||||||
@@ -207,7 +207,7 @@ publish_interaction_scores <- function(df, out_dir) {
|
|||||||
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
|
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to publish z-scores
|
# Publish z-scores
|
||||||
publish_zscores <- function(df, out_dir) {
|
publish_zscores <- function(df, out_dir) {
|
||||||
zscores <- df %>%
|
zscores <- df %>%
|
||||||
mutate(
|
mutate(
|
||||||
|
|||||||
@@ -1,238 +1,375 @@
|
|||||||
#!/usr/bin/env Rscript
|
|
||||||
# JoinInteractExps.R
|
|
||||||
|
|
||||||
library("plyr")
|
|
||||||
library("sos")
|
|
||||||
library("dplyr")
|
library("dplyr")
|
||||||
|
|
||||||
args <- commandArgs(TRUE)
|
# Function to parse and set arguments
|
||||||
|
parse_arguments <- function() {
|
||||||
# Set output dir
|
if (interactive()) {
|
||||||
if (length(args) >= 1) {
|
args <- c(
|
||||||
out_dir <- file.path(args[1])
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
|
||||||
} else {
|
2,
|
||||||
out_dir <- "./" # for legacy workflow
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
|
||||||
}
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
|
||||||
# Set sd value
|
)
|
||||||
if (length(args) >= 2) {
|
} else {
|
||||||
sd <- as.numeric(args[2])
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
} else {
|
|
||||||
sd <- 2 # default value
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf("SD value is: %d", sd)
|
|
||||||
|
|
||||||
# Set study_info file
|
|
||||||
if (length(args) >= 3) {
|
|
||||||
study_info <- file.path(args[3])
|
|
||||||
} else {
|
|
||||||
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
|
|
||||||
}
|
|
||||||
|
|
||||||
studies <- args[4:length(args)]
|
|
||||||
print(studies)
|
|
||||||
input_files <- c()
|
|
||||||
for (i in seq_along(studies)) {
|
|
||||||
study <- studies[i]
|
|
||||||
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
|
|
||||||
if (file.exists(zs_file)) {
|
|
||||||
input_files[i] <- zs_file
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
rm(zs_file, study)
|
|
||||||
|
|
||||||
# for (var in ls()) {
|
list(
|
||||||
# print(paste(var, ":", get(var)))
|
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||||
|
sd = as.numeric(args[2]),
|
||||||
|
study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||||
|
input_dirs = args[4:length(args)]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
args <- parse_arguments()
|
||||||
|
|
||||||
|
# Create an array for the zscores files
|
||||||
|
get_zscores_files <- function(dirs) {
|
||||||
|
files <- sapply(dirs, function(study) {
|
||||||
|
file_path <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||||
|
if (file.exists(file_path)) file_path else NULL
|
||||||
|
}, simplify = TRUE, USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
# Filter out NULL entries
|
||||||
|
files[!sapply(files, is.null)]
|
||||||
|
}
|
||||||
|
|
||||||
|
zscores_files <- get_zscores_files(args$input_dirs)
|
||||||
|
sprintf("The SD value is: %d", args$sd)
|
||||||
|
|
||||||
|
# Ensure there are enough files to compare
|
||||||
|
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
|
||||||
|
|
||||||
|
# Function to join zscores files
|
||||||
|
join_zscores_files <- function(files) {
|
||||||
|
joined_data <- read.csv(file = files[1], stringsAsFactors = FALSE)
|
||||||
|
for (file in files[-1]) {
|
||||||
|
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
|
||||||
|
joined_data <- join(joined_data, temp_data, by = "OrfRep")
|
||||||
|
}
|
||||||
|
joined_data
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load and join zscores files
|
||||||
|
joined_data <- join_zscores_files(zscores_files)
|
||||||
|
|
||||||
|
# Order and select columns
|
||||||
|
order_and_select_columns <- function(data) {
|
||||||
|
ordered_data <- data[, order(colnames(data))]
|
||||||
|
selected_headers <- select(ordered_data,
|
||||||
|
contains("OrfRep"), matches("Gene"),
|
||||||
|
contains("z_lm_k"), contains("z_shift_k"),
|
||||||
|
contains("z_lm_l"), contains("z_shift_l"))
|
||||||
|
selected_headers
|
||||||
|
}
|
||||||
|
|
||||||
|
selected_headers <- order_and_select_columns(joined_data)
|
||||||
|
|
||||||
|
# Remove redundant columns like "Gene.1"
|
||||||
|
clean_headers <- function(data, suffixes) {
|
||||||
|
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
|
||||||
|
select(data, -all_of(suffixes_to_remove))
|
||||||
|
}
|
||||||
|
|
||||||
|
headSel <- clean_headers(selected_headers, length(zscores_files) - 1)
|
||||||
|
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
|
||||||
|
|
||||||
|
# Fill NA values in Shift and Z_lm columns
|
||||||
|
fill_na_in_columns <- function(data) {
|
||||||
|
for (header in colnames(data)) {
|
||||||
|
if (grepl("Shift", header)) {
|
||||||
|
data[[header]][is.na(data[[header]])] <- 0.001
|
||||||
|
} else if (grepl("Z_lm_", header)) {
|
||||||
|
data[[header]][is.na(data[[header]])] <- 0.0001
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data
|
||||||
|
}
|
||||||
|
|
||||||
|
headSel <- fill_na_in_columns(headSel)
|
||||||
|
|
||||||
|
# Filter based on standard deviation
|
||||||
|
filter_by_sd <- function(data, sd) {
|
||||||
|
if (sd == 0) return(data)
|
||||||
|
|
||||||
|
z_lm_cols <- select(data, contains("z_lm_"))
|
||||||
|
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
|
||||||
|
data[filter_vector, ]
|
||||||
|
}
|
||||||
|
|
||||||
|
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), args$sd)
|
||||||
|
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), args$sd)
|
||||||
|
|
||||||
|
# Reorder columns to interleave Z_lm and Shift data
|
||||||
|
reorder_columns <- function(data1, data2) {
|
||||||
|
combined_data <- data1
|
||||||
|
for (i in 3:ncol(data1)) {
|
||||||
|
combined_data <- cbind(combined_data, data2[i], data1[i])
|
||||||
|
}
|
||||||
|
combined_data
|
||||||
|
}
|
||||||
|
|
||||||
|
combI <- reorder_columns(headSel2, shiftOnly)
|
||||||
|
|
||||||
|
# Write output files
|
||||||
|
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
|
||||||
|
# Relabel headers using experiment names from StudyInfo.csv
|
||||||
|
relabel_headers <- function(headers, labels) {
|
||||||
|
new_labels <- headers
|
||||||
|
for (i in seq_along(headers)) {
|
||||||
|
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
|
||||||
|
if (suffix %in% 1:3) {
|
||||||
|
exp_name <- labels[as.numeric(suffix), 2]
|
||||||
|
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_labels
|
||||||
|
}
|
||||||
|
|
||||||
|
LabelStd <- read.csv(file = args$study_info, stringsAsFactors = FALSE)
|
||||||
|
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), LabelStd)
|
||||||
|
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), LabelStd)
|
||||||
|
|
||||||
|
# Save relabeled files
|
||||||
|
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
|
||||||
|
# Save updated parameters
|
||||||
|
LabelStd[, 4] <- args$sd
|
||||||
|
write.csv(LabelStd, file.path(args$out_dir, "parameters.csv"), row.names = FALSE)
|
||||||
|
write.csv(LabelStd, file = args$study_info, row.names = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# library("plyr")
|
||||||
|
# library("sos")
|
||||||
|
# library("dplyr")
|
||||||
|
|
||||||
|
# # Function to parse and set arguments
|
||||||
|
# parse_arguments <- function() {
|
||||||
|
# if (interactive()) {
|
||||||
|
# args <- c(
|
||||||
|
# "./", # Default out_dir
|
||||||
|
# "2", # Default SD value
|
||||||
|
# "../Code/StudyInfo.csv" # Default study_info path
|
||||||
|
# )
|
||||||
|
# } else {
|
||||||
|
# args <- commandArgs(trailingOnly = TRUE)
|
||||||
# }
|
# }
|
||||||
|
|
||||||
# print(input_files)
|
# list(
|
||||||
# print(length(input_files))
|
# out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||||
|
# sd = as.numeric(args[2]),
|
||||||
|
# study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||||
|
# input_dirs = args[4:length(args)]
|
||||||
|
# )
|
||||||
|
# }
|
||||||
|
|
||||||
# TODO this is better handled in a loop in case you want to compare more experiments?
|
# args <- parse_arguments()
|
||||||
# The input is already designed for this
|
|
||||||
# Read in the files for your experiment and
|
|
||||||
# Join the two files at a time as a function of how many inputFile
|
|
||||||
# list the larger file first ? in this example X2 has the larger number of genes
|
|
||||||
# If X1 has a larger number of genes, switch the order of X1 and X2
|
|
||||||
if (length(input_files) < 2) {
|
|
||||||
print("Note enough Exps to compare, skipping join")
|
|
||||||
stop("Exiting script")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(input_files) >= 2) {
|
# # Create an array for the zscores files
|
||||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
|
# zscores_files <- sapply(args$input_dirs, function(study) {
|
||||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
|
# file_path <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||||
X <- join(X1, X2, by = "OrfRep")
|
# if (file.exists(file_path)) file_path else NULL
|
||||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
# }, simplify = TRUE, USE.NAMES = FALSE)
|
||||||
headers <- select(OBH, contains("OrfRep"), matches("Gene"),
|
|
||||||
contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
|
|
||||||
headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
|
|
||||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
|
|
||||||
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(input_files) >= 3) {
|
# # Filter out NULL entries
|
||||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
|
# zscores_files <- zscores_files[!sapply(zscores_files, is.null)]
|
||||||
X <- join(X, X3, by = "OrfRep")
|
|
||||||
headSel <- select(headers, -"Gene.1", -"Gene.2")
|
|
||||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
|
||||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(input_files) >= 4) {
|
# sprintf("The SD value is: %d", args$sd)
|
||||||
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
|
# # print(args$input_dirs)
|
||||||
X <- join(X, X4, by = "OrfRep")
|
|
||||||
headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
|
|
||||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
|
||||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
|
|
||||||
}
|
|
||||||
|
|
||||||
# print(headers)
|
# # TODO this is better handled in a loop in case you want to compare more experiments?
|
||||||
# headSel$contains("Z_Shift") %>% replace_na(0.001)
|
# # The input is already designed for this
|
||||||
headers <- colnames(headSel)
|
# # Read in the files for your experiment and
|
||||||
# print(headers)
|
# # Join the two files at a time as a function of how many inputFile
|
||||||
i <- 0
|
# # list the larger file first ? in this example X2 has the larger number of genes
|
||||||
for (i in 1:length(headers)) {
|
# # If X1 has a larger number of genes, switch the order of X1 and X2
|
||||||
if (grepl("Shift", headers[i])) {
|
# if (length(zscores_files) < 2) {
|
||||||
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
|
# print("Note enough Exps to compare, skipping join")
|
||||||
}
|
# stop("Exiting script")
|
||||||
if (grepl("Z_lm_", headers[i])) {
|
# }
|
||||||
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# 2SD option code to exclude Z_lm values less than 2 standard Deviations
|
# if (length(zscores_files) >= 2) {
|
||||||
REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
|
# X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
|
||||||
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
|
# X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
|
||||||
|
# X <- join(X1, X2, by = "OrfRep")
|
||||||
|
# OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||||
|
# headers <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||||
|
# contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
|
||||||
|
# headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
|
||||||
|
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
|
||||||
|
# headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
|
||||||
|
# }
|
||||||
|
|
||||||
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
# if (length(zscores_files) >= 3) {
|
||||||
Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
|
# X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
|
||||||
|
# X <- join(X, X3, by = "OrfRep")
|
||||||
|
# headSel <- select(headers, -"Gene.1", -"Gene.2")
|
||||||
|
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||||
|
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
||||||
|
# }
|
||||||
|
|
||||||
# Using Text search grepl to relabel headers
|
# if (length(zscores_files) >= 4) {
|
||||||
REMcRdyHdr <- colnames(REMcRdy)
|
# X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
|
||||||
REMcRdyLabels <- "asdf"
|
# X <- join(X, X4, by = "OrfRep")
|
||||||
shftHdr <- colnames(shiftOnly)
|
# headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||||
shiftLabels <- "asdf"
|
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||||
shiftLabels[1:2] <- shftHdr[1:2]
|
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||||
REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
|
# }
|
||||||
|
|
||||||
for (i in 3:(length(shftHdr))) {
|
# # print(headers)
|
||||||
if (i == 3) {
|
# # headSel$contains("Z_Shift") %>% replace_na(0.001)
|
||||||
shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
|
# headers <- colnames(headSel)
|
||||||
REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
|
# # print(headers)
|
||||||
}
|
# i <- 0
|
||||||
if (i == 5) {
|
# for (i in 1:length(headers)) {
|
||||||
shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
|
# if (grepl("Shift", headers[i])) {
|
||||||
REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
|
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
|
||||||
}
|
# }
|
||||||
if (i == 7) {
|
# if (grepl("Z_lm_", headers[i])) {
|
||||||
shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
|
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
|
||||||
REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
|
# }
|
||||||
}
|
# }
|
||||||
if (grepl(".1", shftHdr[i], fixed = true)) {
|
|
||||||
shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
|
|
||||||
REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
|
|
||||||
}
|
|
||||||
if (grepl(".2", shftHdr[i], fixed = true)) {
|
|
||||||
shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
|
|
||||||
REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
|
|
||||||
}
|
|
||||||
if (grepl(".3", shftHdr[i], fixed = true)) {
|
|
||||||
shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
|
|
||||||
REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i in 3:(length(REMcRdyLabels))) {
|
# # 2SD option code to exclude Z_lm values less than 2 standard Deviations
|
||||||
j <- as.integer(i)
|
# REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
|
||||||
REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
|
# shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
|
||||||
shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
|
|
||||||
}
|
|
||||||
|
|
||||||
colnames(shiftOnly) <- shiftLabels
|
# # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
||||||
colnames(REMcRdy) <- REMcRdyLabels
|
# Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
|
||||||
|
|
||||||
combI <- headSel2 # starting Template orf, Genename columns
|
# # Using Text search grepl to relabel headers
|
||||||
|
# REMcRdyHdr <- colnames(REMcRdy)
|
||||||
|
# REMcRdyLabels <- "asdf"
|
||||||
|
# shftHdr <- colnames(shiftOnly)
|
||||||
|
# shiftLabels <- "asdf"
|
||||||
|
# shiftLabels[1:2] <- shftHdr[1:2]
|
||||||
|
# REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
|
||||||
|
|
||||||
# headersRemc<-colnames(REMcRdy)
|
# for (i in 3:(length(shftHdr))) {
|
||||||
# Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
# if (i == 3) {
|
||||||
for (i in 3:length(colnames(REMcRdy))) {
|
# shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
|
||||||
combI <- cbind.data.frame(combI, shiftOnly[i])
|
# REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
|
||||||
combI <- cbind.data.frame(combI, REMcRdy[i])
|
# }
|
||||||
}
|
# if (i == 5) {
|
||||||
|
# shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
|
||||||
|
# REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
|
||||||
|
# }
|
||||||
|
# if (i == 7) {
|
||||||
|
# shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
|
||||||
|
# REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
|
||||||
|
# }
|
||||||
|
# if (grepl(".1", shftHdr[i], fixed = true)) {
|
||||||
|
# shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
|
||||||
|
# REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
|
||||||
|
# }
|
||||||
|
# if (grepl(".2", shftHdr[i], fixed = true)) {
|
||||||
|
# shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
|
||||||
|
# REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
|
||||||
|
# }
|
||||||
|
# if (grepl(".3", shftHdr[i], fixed = true)) {
|
||||||
|
# shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
|
||||||
|
# REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
Vec1 <- NA
|
# for (i in 3:(length(REMcRdyLabels))) {
|
||||||
Vec2 <- NA
|
# j <- as.integer(i)
|
||||||
Vec3 <- NA
|
# REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
|
||||||
Vec4 <- NA
|
# shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
|
||||||
Vec5 <- NA
|
# }
|
||||||
Vec6 <- NA
|
|
||||||
Vec7 <- NA
|
|
||||||
Vec8 <- NA
|
|
||||||
|
|
||||||
if (length(REMcRdy) == 6) {
|
# colnames(shiftOnly) <- shiftLabels
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
# colnames(REMcRdy) <- REMcRdyLabels
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
|
||||||
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
|
||||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
|
||||||
shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(REMcRdy) == 8) {
|
# combI <- headSel2 # starting Template orf, Genename columns
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
|
||||||
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
|
||||||
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
|
||||||
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
|
||||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
|
||||||
shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(REMcRdy) == 10) {
|
# # headersRemc<-colnames(REMcRdy)
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
# # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
# for (i in 3:length(colnames(REMcRdy))) {
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
# combI <- cbind.data.frame(combI, shiftOnly[i])
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
# combI <- cbind.data.frame(combI, REMcRdy[i])
|
||||||
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
# }
|
||||||
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
|
||||||
Vec7 <- abs(REMcRdy[, 9]) >= sd
|
|
||||||
Vec8 <- abs(REMcRdy[, 10]) >= sd
|
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
|
||||||
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
|
||||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
|
||||||
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sd != 0) {
|
# Vec1 <- NA
|
||||||
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
# Vec2 <- NA
|
||||||
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
# Vec3 <- NA
|
||||||
}
|
# Vec4 <- NA
|
||||||
|
# Vec5 <- NA
|
||||||
|
# Vec6 <- NA
|
||||||
|
# Vec7 <- NA
|
||||||
|
# Vec8 <- NA
|
||||||
|
|
||||||
if (sd == 0) {
|
# if (length(REMcRdy) == 6) {
|
||||||
REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
}
|
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
|
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
|
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
||||||
|
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
|
# REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
||||||
|
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||||
|
# shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
|
||||||
|
# }
|
||||||
|
|
||||||
# R places hidden "" around the header names. The following
|
# if (length(REMcRdy) == 8) {
|
||||||
# is intended to remove those quote so that the "" do not blow up the Java REMc.
|
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
# Use ,quote=F in the write.csv statement to fix R output file.
|
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
|
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
# Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||||
# LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
# Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||||
|
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
||||||
|
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
|
# REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
||||||
|
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||||
|
# shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
|
||||||
|
# }
|
||||||
|
|
||||||
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
# if (length(REMcRdy) == 10) {
|
||||||
# print(sd)
|
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
LabelStd[, 4] <- as.numeric(sd)
|
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
write.csv(LabelStd, file = study_info, row.names = FALSE)
|
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
|
# Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||||
|
# Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||||
|
# Vec7 <- abs(REMcRdy[, 9]) >= sd
|
||||||
|
# Vec8 <- abs(REMcRdy[, 10]) >= sd
|
||||||
|
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
||||||
|
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
|
# REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
||||||
|
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||||
|
# shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
||||||
|
# }
|
||||||
|
|
||||||
|
# if (sd != 0) {
|
||||||
|
# REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
||||||
|
# shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
||||||
|
# }
|
||||||
|
|
||||||
|
# if (sd == 0) {
|
||||||
|
# REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
||||||
|
# shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # R places hidden "" around the header names. The following
|
||||||
|
# # is intended to remove those quote so that the "" do not blow up the Java REMc.
|
||||||
|
# # Use ,quote=F in the write.csv statement to fix R output file.
|
||||||
|
# # write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
|
||||||
|
# write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
# write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||||
|
# # LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
||||||
|
|
||||||
|
# LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
||||||
|
# # print(sd)
|
||||||
|
# LabelStd[, 4] <- as.numeric(sd)
|
||||||
|
# write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
||||||
|
# write.csv(LabelStd, file = study_info, row.names = FALSE)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user