Add more informative output

This commit is contained in:
2024-09-01 17:33:40 -04:00
parent c6c4870d46
commit e1f57ff7d7

View File

@@ -204,10 +204,10 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q
for (var in variables) { for (var in variables) {
scatter_plot <- scatter_plot <-
generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter", generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter",
title = paste(prefix, "Scatter Plot for", var)) title = paste(prefix, "Scatter Plot for", var))
boxplot <- boxplot <-
generate_plot(df, x_var = "scan", y_var = var, plot_type = "box", generate_plot(df, x_var = "scan", y_var = var, plot_type = "box",
title = paste(prefix, "Box Plot for", var)) title = paste(prefix, "Box Plot for", var))
plots[[paste0(var, "_scatter")]] <- scatter_plot plots[[paste0(var, "_scatter")]] <- scatter_plot
plots[[paste0(var, "_box")]] <- boxplot plots[[paste0(var, "_box")]] <- boxplot
@@ -216,13 +216,13 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q
if (include_qc) { if (include_qc) {
plots[["Raw_L_vs_K"]] <- plots[["Raw_L_vs_K"]] <-
generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter", generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC") title = "Raw L vs K before QC")
plots[["Delta_bg_Density"]] <- plots[["Delta_bg_Density"]] <-
generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num", generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num",
title = "Density plot for Delta Background by Conc All Data") title = "Density plot for Delta Background by Conc All Data")
plots[["Delta_bg_Bar"]] <- plots[["Delta_bg_Bar"]] <-
generate_plot(df, x_var = "delta_bg", plot_type = "bar", generate_plot(df, x_var = "delta_bg", plot_type = "bar",
title = "Bar plot for Delta Background by Conc All Data") title = "Bar plot for Delta Background by Conc All Data")
} }
save_plots(prefix, plots, output_dir) save_plots(prefix, plots, output_dir)
@@ -233,6 +233,10 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
summary_stats <- df %>% summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(variables), list( summarise(across(all_of(variables), list(
N = ~{
message("Calculating summary statistics for ", cur_column())
n()
},
mean = ~mean(.x, na.rm = TRUE), mean = ~mean(.x, na.rm = TRUE),
median = ~median(.x, na.rm = TRUE), median = ~median(.x, na.rm = TRUE),
max = ~max(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE),
@@ -284,11 +288,10 @@ save_plots <- function(file_name, plot_list, output_dir) {
message("Error in plot: ", plot_name, "\n", e) message("Error in plot: ", plot_name, "\n", e)
return(NULL) return(NULL)
}) })
if (!is.null(pgg)) { if (!is.null(pgg)) {
saveWidget(pgg, saveWidget(pgg,
file = file.path(output_dir, file = file.path(output_dir,
paste0(file_name, "_", plot_name, ".html")), paste0(file_name, "_", plot_name, ".html")),
selfcontained = TRUE) selfcontained = TRUE)
} }
}) })
@@ -329,8 +332,8 @@ process_strains <- function(df, l_within_2sd_k, strain, output_dir) {
return(df_strains) return(df_strains)
} }
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate all necessary statistics and shifts in one step # Calculate all necessary statistics and shifts in one step
interaction_scores_all <- df %>% interaction_scores_all <- df %>%
@@ -453,7 +456,7 @@ generate_summary_plots <- function(df, output_dir) {
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
lm_summaries <- lapply(lm_list, summary) lm_summaries <- lapply(lm_list, summary)
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC", plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
plot_list <- lapply(seq_along(lm_list), function(i) { plot_list <- lapply(seq_along(lm_list), function(i) {
ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) + ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) +
@@ -574,10 +577,10 @@ main <- function() {
# Generate QC PDFs and HTMLs # Generate QC PDFs and HTMLs
message("Generating QC plots") message("Generating QC plots")
variables <- c("L", "K", "r", "AUC", "delta_bg") variables <- c("L", "K", "r", "AUC", "delta_bg")
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) # generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) # generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) # generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) # generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Calculate summary statistics # Calculate summary statistics
message("Calculating summary statistics for all strains") message("Calculating summary statistics for all strains")