Add more output to main() to aid debug
This commit is contained in:
@@ -564,7 +564,16 @@ main <- function() {
|
||||
)
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Generate QC PDFs and HTMLs
|
||||
message("Generating QC plots")
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
|
||||
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
|
||||
|
||||
# Calculate summary statistics
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
@@ -575,16 +584,11 @@ main <- function() {
|
||||
stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
|
||||
# Generate QC PDFs and HTMLs
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
|
||||
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
|
||||
|
||||
# Process background strains
|
||||
background_strains <- c("YDL227C")
|
||||
lapply(background_strains, function(strain) {
|
||||
|
||||
message("Processing background strain: ", strain)
|
||||
|
||||
# Handle missing data by setting zero values to NA
|
||||
# and then removing any rows with NA in L col
|
||||
@@ -599,73 +603,72 @@ main <- function() {
|
||||
filter(!is.na(L))
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
message("Calculating summary statistics for background strain")
|
||||
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||
stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
|
||||
# Backup in case previous block explodes
|
||||
# Combine all summary statistics into one dataframe
|
||||
# df_stats_background <- stats_l %>%
|
||||
# left_join(stats_k, by = c("OrfRep", "conc_num", "conc_num_factor")) %>%
|
||||
# left_join(stats_r, by = c("OrfRep", "conc_num", "conc_num_factor")) %>%
|
||||
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
|
||||
# Save the summary statistics for the background strain
|
||||
write.csv(stats_background,
|
||||
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
# Calculate L values within and outside 2SD of K
|
||||
|
||||
# Filter L values within and outside 2SD of K
|
||||
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
|
||||
within_2sd_k <- results_2sd$within_2sd_k
|
||||
outside_2sd_k <- results_2sd$outside_2sd_k
|
||||
|
||||
# Calculate summary statistics for L within and outside 2SD of K
|
||||
message("Calculating summary statistics for for L within 2SD of K")
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_within_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for for L outside 2SD of K")
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_outside_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots")
|
||||
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||
|
||||
message("Calculating background means")
|
||||
background_means <- calculate_background_means(stats_by_l_background,
|
||||
stats_by_k_background, stats_by_r_background, stats_by_auc_background)
|
||||
|
||||
# Formerly X2_RF
|
||||
# Filter reference and deletion strains
|
||||
# Formerly X2_RF (reference strain)
|
||||
df_reference <- df_na %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
# Formerly X2
|
||||
# Formerly X2 (deletion strains)
|
||||
df_deletion <- df_na %>%
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
|
||||
df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
|
||||
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
message("Processing reference strain")
|
||||
reference_strain <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
|
||||
message("Processing deletion strains")
|
||||
deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
|
||||
|
||||
# Deprecated
|
||||
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
||||
# df_reference_strains <- df_reference_strains %>%
|
||||
# reference_strain <- reference_strain %>%
|
||||
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
|
||||
# This is synonymous with the legacy OrfRep mutation
|
||||
# Use group_by in functions in lieu of mutating OrfRep
|
||||
# default_group_vars <- c("OrfRep", "Gene", "num")
|
||||
|
||||
reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l,
|
||||
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
||||
|
||||
deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l,
|
||||
# Calculate interactions
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
message("Calculating reference interaction scores")
|
||||
reference_results <- calculate_interaction_scores(reference_strain, stats_by_l,
|
||||
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
||||
message("Calculating deletion interaction scores")
|
||||
deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l,
|
||||
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
||||
|
||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||
@@ -673,12 +676,12 @@ main <- function() {
|
||||
zscores_calculations <- deletion_results$zscores_calculations
|
||||
zscores_interactions <- deletion_results$zscores_interactions
|
||||
|
||||
|
||||
# TODO: I don't know if we need this?
|
||||
zscores_interactions <- zscores_interactions %>%
|
||||
arrange(desc(Z_lm_L)) %>%
|
||||
arrange(desc(NG))
|
||||
|
||||
message("Writing zscores csv files")
|
||||
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||
@@ -697,6 +700,7 @@ main <- function() {
|
||||
suppressors_K <- zscores_interactions[suppressor_condition_K, ]
|
||||
|
||||
# Save enhancers and suppressors
|
||||
message("Writing enhancer/suppressor csv files")
|
||||
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
|
||||
@@ -719,6 +723,7 @@ main <- function() {
|
||||
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
|
||||
|
||||
# Save linear model based enhancers and suppressors
|
||||
message("Writing linear model enhancer/suppressor csv files")
|
||||
write.csv(enhancers_lm_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_L,
|
||||
|
||||
Reference in New Issue
Block a user