Add more output to main() to aid debug

This commit is contained in:
2024-09-01 16:44:57 -04:00
parent 403a7490eb
commit 6b1428a384

View File

@@ -564,7 +564,16 @@ main <- function() {
) )
df_no_zeros <- df_na %>% filter(L > 0) df_no_zeros <- df_na %>% filter(L > 0)
# Generate QC PDFs and HTMLs
message("Generating QC plots")
variables <- c("L", "K", "r", "AUC", "delta_bg")
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Calculate summary statistics # Calculate summary statistics
message("Calculating summary statistics for all strains")
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor")) summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor"))
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
@@ -575,17 +584,12 @@ main <- function() {
stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
# Generate QC PDFs and HTMLs
variables <- c("L", "K", "r", "AUC", "delta_bg")
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Process background strains # Process background strains
background_strains <- c("YDL227C") background_strains <- c("YDL227C")
lapply(background_strains, function(strain) { lapply(background_strains, function(strain) {
message("Processing background strain: ", strain)
# Handle missing data by setting zero values to NA # Handle missing data by setting zero values to NA
# and then removing any rows with NA in L col # and then removing any rows with NA in L col
df_background <- df_na %>% df_background <- df_na %>%
@@ -599,73 +603,72 @@ main <- function() {
filter(!is.na(L)) filter(!is.na(L))
# Recalculate summary statistics for the background strain # Recalculate summary statistics for the background strain
message("Calculating summary statistics for background strain")
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
# Backup in case previous block explodes
# Combine all summary statistics into one dataframe
# df_stats_background <- stats_l %>%
# left_join(stats_k, by = c("OrfRep", "conc_num", "conc_num_factor")) %>%
# left_join(stats_r, by = c("OrfRep", "conc_num", "conc_num_factor")) %>%
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
# Save the summary statistics for the background strain
write.csv(stats_background, write.csv(stats_background,
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE) row.names = FALSE)
# Calculate L values within and outside 2SD of K # Filter L values within and outside 2SD of K
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background) results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
within_2sd_k <- results_2sd$within_2sd_k within_2sd_k <- results_2sd$within_2sd_k
outside_2sd_k <- results_2sd$outside_2sd_k outside_2sd_k <- results_2sd$outside_2sd_k
# Calculate summary statistics for L within and outside 2SD of K # Calculate summary statistics for L within and outside 2SD of K
message("Calculating summary statistics for for L within 2SD of K")
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_within_2sd_k, write.csv(l_within_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
row.names = FALSE) row.names = FALSE)
message("Calculating summary statistics for for L outside 2SD of K")
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_outside_2sd_k, write.csv(l_outside_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
row.names = FALSE) row.names = FALSE)
message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots")
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
message("Calculating background means")
background_means <- calculate_background_means(stats_by_l_background, background_means <- calculate_background_means(stats_by_l_background,
stats_by_k_background, stats_by_r_background, stats_by_auc_background) stats_by_k_background, stats_by_r_background, stats_by_auc_background)
# Formerly X2_RF # Filter reference and deletion strains
# Formerly X2_RF (reference strain)
df_reference <- df_na %>% df_reference <- df_na %>%
filter(OrfRep == strain) %>% filter(OrfRep == strain) %>%
mutate(SM = 0) mutate(SM = 0)
# Formerly X2 # Formerly X2 (deletion strains)
df_deletion <- df_na %>% df_deletion <- df_na %>%
filter(OrfRep != strain) %>% filter(OrfRep != strain) %>%
mutate(SM = 0) mutate(SM = 0)
df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir) message("Processing reference strain")
df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir) reference_strain <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
message("Processing deletion strains")
variables <- c("L", "K", "r", "AUC") deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
# Deprecated # Deprecated
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
# df_reference_strains <- df_reference_strains %>% # reference_strain <- reference_strain %>%
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default # We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
# This is synonymous with the legacy OrfRep mutation # This is synonymous with the legacy OrfRep mutation
# Use group_by in functions in lieu of mutating OrfRep # Use group_by in functions in lieu of mutating OrfRep
# default_group_vars <- c("OrfRep", "Gene", "num") # default_group_vars <- c("OrfRep", "Gene", "num")
reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l, # Calculate interactions
variables <- c("L", "K", "r", "AUC")
message("Calculating reference interaction scores")
reference_results <- calculate_interaction_scores(reference_strain, stats_by_l,
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
message("Calculating deletion interaction scores")
deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l, deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l,
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
zscores_calculations_reference <- reference_results$zscores_calculations zscores_calculations_reference <- reference_results$zscores_calculations
@@ -673,12 +676,12 @@ main <- function() {
zscores_calculations <- deletion_results$zscores_calculations zscores_calculations <- deletion_results$zscores_calculations
zscores_interactions <- deletion_results$zscores_interactions zscores_interactions <- deletion_results$zscores_interactions
# TODO: I don't know if we need this? # TODO: I don't know if we need this?
zscores_interactions <- zscores_interactions %>% zscores_interactions <- zscores_interactions %>%
arrange(desc(Z_lm_L)) %>% arrange(desc(Z_lm_L)) %>%
arrange(desc(NG)) arrange(desc(NG))
message("Writing zscores csv files")
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
@@ -697,6 +700,7 @@ main <- function() {
suppressors_K <- zscores_interactions[suppressor_condition_K, ] suppressors_K <- zscores_interactions[suppressor_condition_K, ]
# Save enhancers and suppressors # Save enhancers and suppressors
message("Writing enhancer/suppressor csv files")
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
@@ -719,6 +723,7 @@ main <- function() {
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ] suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
# Save linear model based enhancers and suppressors # Save linear model based enhancers and suppressors
message("Writing linear model enhancer/suppressor csv files")
write.csv(enhancers_lm_L, write.csv(enhancers_lm_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_L, write.csv(suppressors_lm_L,