Remove deprecated code and re-enable QC plots

This commit is contained in:
2024-09-01 22:54:54 -04:00
parent 65b273ffa1
commit 66062438b5

View File

@@ -556,13 +556,13 @@ main <- function() {
df_na %>% filter(if_all(c(L), is.finite)) # Add L, r, AUC, K as needed for debugging
}
# # Generate QC PDFs and HTMLs
# message("Generating QC plots")
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
# generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
# generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
# generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Generate QC PDFs and HTMLs
message("Generating QC plots")
variables <- c("L", "K", "r", "AUC", "delta_bg")
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
rm(df, df_above_tolerance, df_no_zeros)
@@ -573,15 +573,7 @@ main <- function() {
write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
stats_joined <- left_join(df_na, stats, by = c("conc_num", "conc_num_factor"))
# Create separate dataframes for each variable (we'll use later for plotting)
# stats_by_l <- stats_joined %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
# stats_by_k <- stats_joined %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
# stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
# stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
# Originally this filtered L NA's
# I've removed that filtering for now since it didn't seem right but may need to add it back in later
# str(stats_by_k)
# Filter data within 2SD
within_2sd_k <- stats_joined %>%
@@ -624,10 +616,6 @@ main <- function() {
# Recalculate summary statistics for the background strain
message("Calculating summary statistics for background strain")
stats_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
# stats_by_l_bg <- stats_bg %>% select(starts_with("L_"), "OrfRep", "Gene", "conc_num", "conc_num_factor")
# stats_by_k_bg <- stats_bg %>% select(starts_with("K_"), "OrfRep", "Gene", "conc_num", "conc_num_factor")
# stats_by_r_bg <- stats_bg %>% select(starts_with("r_"), "OrfRep", "Gene", "conc_num", "conc_num_factor")
# stats_by_auc_bg <- stats_bg %>% select(starts_with("AUC_"), "OrfRep", "Gene", "conc_num", "conc_num_factor")
write.csv(stats_bg,
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE)
@@ -649,15 +637,6 @@ main <- function() {
message("Processing deletion strains")
deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain)
# Deprecated
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
# reference_strain <- reference_strain %>%
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
# This is synonymous with the legacy OrfRep mutation
# Use group_by in functions in lieu of mutating OrfRep
# default_group_vars <- c("OrfRep", "Gene", "num")
# TODO we may need to add "num" to grouping vars
# Calculate interactions