Add more informative output

This commit is contained in:
2024-09-01 17:33:40 -04:00
parent c6c4870d46
commit e1f57ff7d7

View File

@@ -127,7 +127,7 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
conc_num_factor = as.numeric(as.factor(conc_num)) - 1) conc_num_factor = as.numeric(as.factor(conc_num)) - 1)
return(df) return(df)
} }
@@ -137,7 +137,7 @@ update_gene_names <- function(df, sgd_gene_list) {
genes <- read.delim(file = sgd_gene_list, genes <- read.delim(file = sgd_gene_list,
quote = "", header = FALSE, quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
# Create a named vector for mapping ORF to GeneName # Create a named vector for mapping ORF to GeneName
gene_map <- setNames(genes$V5, genes$V4) gene_map <- setNames(genes$V5, genes$V4)
@@ -150,7 +150,7 @@ update_gene_names <- function(df, sgd_gene_list) {
# Ensure Gene is not left blank or incorrectly updated to "OCT1" # Ensure Gene is not left blank or incorrectly updated to "OCT1"
df <- df %>% df <- df %>%
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes)) mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
return(df) return(df)
} }
@@ -193,21 +193,21 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_
generate_and_save_plots <- function(df, output_dir, prefix, variables, include_qc = FALSE) { generate_and_save_plots <- function(df, output_dir, prefix, variables, include_qc = FALSE) {
plots <- list() plots <- list()
if (nrow(df) == 0) { if (nrow(df) == 0) {
message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots") message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
return() return()
} }
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe") message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
for (var in variables) { for (var in variables) {
scatter_plot <- scatter_plot <-
generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter", generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter",
title = paste(prefix, "Scatter Plot for", var)) title = paste(prefix, "Scatter Plot for", var))
boxplot <- boxplot <-
generate_plot(df, x_var = "scan", y_var = var, plot_type = "box", generate_plot(df, x_var = "scan", y_var = var, plot_type = "box",
title = paste(prefix, "Box Plot for", var)) title = paste(prefix, "Box Plot for", var))
plots[[paste0(var, "_scatter")]] <- scatter_plot plots[[paste0(var, "_scatter")]] <- scatter_plot
plots[[paste0(var, "_box")]] <- boxplot plots[[paste0(var, "_box")]] <- boxplot
@@ -216,13 +216,13 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q
if (include_qc) { if (include_qc) {
plots[["Raw_L_vs_K"]] <- plots[["Raw_L_vs_K"]] <-
generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter", generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC") title = "Raw L vs K before QC")
plots[["Delta_bg_Density"]] <- plots[["Delta_bg_Density"]] <-
generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num", generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num",
title = "Density plot for Delta Background by Conc All Data") title = "Density plot for Delta Background by Conc All Data")
plots[["Delta_bg_Bar"]] <- plots[["Delta_bg_Bar"]] <-
generate_plot(df, x_var = "delta_bg", plot_type = "bar", generate_plot(df, x_var = "delta_bg", plot_type = "bar",
title = "Bar plot for Delta Background by Conc All Data") title = "Bar plot for Delta Background by Conc All Data")
} }
save_plots(prefix, plots, output_dir) save_plots(prefix, plots, output_dir)
@@ -233,6 +233,10 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
summary_stats <- df %>% summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(variables), list( summarise(across(all_of(variables), list(
N = ~{
message("Calculating summary statistics for ", cur_column())
n()
},
mean = ~mean(.x, na.rm = TRUE), mean = ~mean(.x, na.rm = TRUE),
median = ~median(.x, na.rm = TRUE), median = ~median(.x, na.rm = TRUE),
max = ~max(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE),
@@ -284,11 +288,10 @@ save_plots <- function(file_name, plot_list, output_dir) {
message("Error in plot: ", plot_name, "\n", e) message("Error in plot: ", plot_name, "\n", e)
return(NULL) return(NULL)
}) })
if (!is.null(pgg)) { if (!is.null(pgg)) {
saveWidget(pgg, saveWidget(pgg,
file = file.path(output_dir, file = file.path(output_dir,
paste0(file_name, "_", plot_name, ".html")), paste0(file_name, "_", plot_name, ".html")),
selfcontained = TRUE) selfcontained = TRUE)
} }
}) })
@@ -325,13 +328,13 @@ process_strains <- function(df, l_within_2sd_k, strain, output_dir) {
} }
df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration
} }
return(df_strains) return(df_strains)
} }
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate all necessary statistics and shifts in one step # Calculate all necessary statistics and shifts in one step
interaction_scores_all <- df %>% interaction_scores_all <- df %>%
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>% group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
@@ -400,7 +403,7 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
SM = sum(SM, na.rm = TRUE) SM = sum(SM, na.rm = TRUE)
) %>% ) %>%
ungroup() ungroup()
# Ensure interaction_scores has only the specified columns # Ensure interaction_scores has only the specified columns
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l, select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
@@ -436,7 +439,7 @@ generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file
labels = unique(as.character(df_calculations$conc_num))) + labels = unique(as.character(df_calculations$conc_num))) +
theme_publication() theme_publication()
}) })
save_plots(file_prefix, plot_list, output_dir) save_plots(file_prefix, plot_list, output_dir)
} }
@@ -445,7 +448,7 @@ generate_summary_plots <- function(df, output_dir) {
plot_list <- lapply(variables, function(var) { plot_list <- lapply(variables, function(var) {
generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var)) generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var))
}) })
save_plots("Summary_Plots", plot_list, output_dir) save_plots("Summary_Plots", plot_list, output_dir)
} }
@@ -453,7 +456,7 @@ generate_summary_plots <- function(df, output_dir) {
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
lm_summaries <- lapply(lm_list, summary) lm_summaries <- lapply(lm_list, summary)
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC", plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
plot_list <- lapply(seq_along(lm_list), function(i) { plot_list <- lapply(seq_along(lm_list), function(i) {
ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) + ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) +
@@ -463,7 +466,7 @@ generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[i]]$r.squared, 3))) + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[i]]$r.squared, 3))) +
theme_Publication_legend_right() theme_Publication_legend_right()
}) })
save_plots("Correlation_CPPs", plot_list, output_dir) save_plots("Correlation_CPPs", plot_list, output_dir)
} }
@@ -488,7 +491,7 @@ adjust_missing_and_rank <- function(df) {
r_Rank_lm = rank(Z_lm_r), r_Rank_lm = rank(Z_lm_r),
AUC_Rank_lm = rank(Z_lm_AUC) AUC_Rank_lm = rank(Z_lm_AUC)
) )
return(df) return(df)
} }
@@ -556,7 +559,7 @@ main <- function() {
df <- update_gene_names(df, args$sgd_gene_list) df <- update_gene_names(df, args$sgd_gene_list)
max_conc <- max(df$conc_num_factor) max_conc <- max(df$conc_num_factor)
# QC # QC
# Filter the df above sd tolerance # Filter the df above sd tolerance
df_above_tolerance <- df %>% filter(DB == 1) df_above_tolerance <- df %>% filter(DB == 1)
@@ -570,31 +573,31 @@ main <- function() {
K = ifelse(DB == 1, NA, K) K = ifelse(DB == 1, NA, K)
) )
df_no_zeros <- df_na %>% filter(L > 0) df_no_zeros <- df_na %>% filter(L > 0)
# Generate QC PDFs and HTMLs # Generate QC PDFs and HTMLs
message("Generating QC plots") message("Generating QC plots")
variables <- c("L", "K", "r", "AUC", "delta_bg") variables <- c("L", "K", "r", "AUC", "delta_bg")
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) # generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) # generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) # generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) # generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Calculate summary statistics # Calculate summary statistics
message("Calculating summary statistics for all strains") message("Calculating summary statistics for all strains")
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor")) summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor"))
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
# Calculate the pre-background stats once # Calculate the pre-background stats once
stats_by_l <- summary_stats %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_l <- summary_stats %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_k <- summary_stats %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_k <- summary_stats %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
# Process background strains # Process background strains
background_strains <- c("YDL227C") background_strains <- c("YDL227C")
lapply(background_strains, function(strain) { lapply(background_strains, function(strain) {
message("Processing background strain: ", strain) message("Processing background strain: ", strain)
# Handle missing data by setting zero values to NA # Handle missing data by setting zero values to NA
@@ -608,7 +611,7 @@ main <- function() {
AUC = if_else(AUC == 0, NA, AUC) AUC = if_else(AUC == 0, NA, AUC)
) %>% ) %>%
filter(!is.na(L)) filter(!is.na(L))
# Recalculate summary statistics for the background strain # Recalculate summary statistics for the background strain
message("Calculating summary statistics for background strain") message("Calculating summary statistics for background strain")
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
@@ -619,12 +622,12 @@ main <- function() {
write.csv(stats_background, write.csv(stats_background,
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE) row.names = FALSE)
# Filter L values within and outside 2SD of K # Filter L values within and outside 2SD of K
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background) results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
within_2sd_k <- results_2sd$within_2sd_k within_2sd_k <- results_2sd$within_2sd_k
outside_2sd_k <- results_2sd$outside_2sd_k outside_2sd_k <- results_2sd$outside_2sd_k
# Calculate summary statistics for L within and outside 2SD of K # Calculate summary statistics for L within and outside 2SD of K
message("Calculating summary statistics for for L within 2SD of K") message("Calculating summary statistics for for L within 2SD of K")
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
@@ -636,20 +639,20 @@ main <- function() {
write.csv(l_outside_2sd_k, write.csv(l_outside_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
row.names = FALSE) row.names = FALSE)
message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots") message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots")
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
message("Calculating background means") message("Calculating background means")
background_means <- calculate_background_means(stats_by_l_background, background_means <- calculate_background_means(stats_by_l_background,
stats_by_k_background, stats_by_r_background, stats_by_auc_background) stats_by_k_background, stats_by_r_background, stats_by_auc_background)
# Filter reference and deletion strains # Filter reference and deletion strains
# Formerly X2_RF (reference strain) # Formerly X2_RF (reference strain)
df_reference <- df_na %>% df_reference <- df_na %>%
filter(OrfRep == strain) %>% filter(OrfRep == strain) %>%
mutate(SM = 0) mutate(SM = 0)
# Formerly X2 (deletion strains) # Formerly X2 (deletion strains)
df_deletion <- df_na %>% df_deletion <- df_na %>%
filter(OrfRep != strain) %>% filter(OrfRep != strain) %>%
@@ -659,7 +662,7 @@ main <- function() {
reference_strain <- process_strains(df_reference, l_within_2sd_k, strain, out_dir) reference_strain <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
message("Processing deletion strains") message("Processing deletion strains")
deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir) deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
# Deprecated # Deprecated
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
# reference_strain <- reference_strain %>% # reference_strain <- reference_strain %>%
@@ -668,7 +671,7 @@ main <- function() {
# This is synonymous with the legacy OrfRep mutation # This is synonymous with the legacy OrfRep mutation
# Use group_by in functions in lieu of mutating OrfRep # Use group_by in functions in lieu of mutating OrfRep
# default_group_vars <- c("OrfRep", "Gene", "num") # default_group_vars <- c("OrfRep", "Gene", "num")
# Calculate interactions # Calculate interactions
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
message("Calculating reference interaction scores") message("Calculating reference interaction scores")
@@ -677,58 +680,58 @@ main <- function() {
message("Calculating deletion interaction scores") message("Calculating deletion interaction scores")
deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l, deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l,
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
zscores_calculations_reference <- reference_results$zscores_calculations zscores_calculations_reference <- reference_results$zscores_calculations
zscores_interactions_reference <- reference_results$zscores_interactions zscores_interactions_reference <- reference_results$zscores_interactions
zscores_calculations <- deletion_results$zscores_calculations zscores_calculations <- deletion_results$zscores_calculations
zscores_interactions <- deletion_results$zscores_interactions zscores_interactions <- deletion_results$zscores_interactions
# TODO: I don't know if we need this? # TODO: I don't know if we need this?
zscores_interactions <- zscores_interactions %>% zscores_interactions <- zscores_interactions %>%
arrange(desc(Z_lm_L)) %>% arrange(desc(Z_lm_L)) %>%
arrange(desc(NG)) arrange(desc(NG))
message("Writing zscores csv files") message("Writing zscores csv files")
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
# Define conditions for enhancers and suppressors # Define conditions for enhancers and suppressors
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2 enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2 suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2 enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2 suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
# Subset data # Subset data
enhancers_L <- zscores_interactions[enhancer_condition_L, ] enhancers_L <- zscores_interactions[enhancer_condition_L, ]
suppressors_L <- zscores_interactions[suppressor_condition_L, ] suppressors_L <- zscores_interactions[suppressor_condition_L, ]
enhancers_K <- zscores_interactions[enhancer_condition_K, ] enhancers_K <- zscores_interactions[enhancer_condition_K, ]
suppressors_K <- zscores_interactions[suppressor_condition_K, ] suppressors_K <- zscores_interactions[suppressor_condition_K, ]
# Save enhancers and suppressors # Save enhancers and suppressors
message("Writing enhancer/suppressor csv files") message("Writing enhancer/suppressor csv files")
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
# Combine conditions for enhancers and suppressors # Combine conditions for enhancers and suppressors
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ] enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ] enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
# Save combined enhancers and suppressors # Save combined enhancers and suppressors
write.csv(enhancers_and_suppressors_L, write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K, write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
# Handle linear model based enhancers and suppressors # Handle linear model based enhancers and suppressors
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ] enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ] suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ] enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ] suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
# Save linear model based enhancers and suppressors # Save linear model based enhancers and suppressors
message("Writing linear model enhancer/suppressor csv files") message("Writing linear model enhancer/suppressor csv files")
write.csv(enhancers_lm_L, write.csv(enhancers_lm_L,
@@ -739,15 +742,15 @@ main <- function() {
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
# Generate plots for interaction scores # Generate plots for interaction scores
generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir) generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
generate_rf_plots(zscores_calculations, zscores_interactions, out_dir) generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
# Apply filtering to remove NA values before further analysis # Apply filtering to remove NA values before further analysis
zscores_interactions_filtered <- zscores_interactions %>% zscores_interactions_filtered <- zscores_interactions %>%
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
# Generate summary and correlation plots # Generate summary and correlation plots
generate_summary_plots(df, out_dir) generate_summary_plots(df, out_dir)
lm_list <- list( lm_list <- list(
@@ -758,10 +761,10 @@ main <- function() {
lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
) )
# Generate cpp correlation plots # Generate cpp correlation plots
generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir) generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
# Generate ranked plots # Generate ranked plots
create_ranked_plots(zscores_interactions_filtered, out_dir) create_ranked_plots(zscores_interactions_filtered, out_dir)
}) })