Explorar o código

Regroup calculations

Bryan Roessler hai 6 meses
pai
achega
ddcc26050f
Modificáronse 1 ficheiros con 33 adicións e 38 borrados
  1. 33 38
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 33 - 38
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
 
   # Calculate WT statistics from df_bg
   wt_stats <- df_bg %>%
-    group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>%
+    group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>%
     summarise(
       WT_L = mean(mean_L, na.rm = TRUE),
       WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -223,12 +223,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
     )
 
-  # Join WT stats back to df
-  df <- df %>%
-    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor"))
-
   # Compute mean values at zero concentration
-  mean_L_zero_df <- df %>%
+  mean_zeroes <- df %>%
     filter(conc_num == 0) %>%
     group_by(across(all_of(group_vars))) %>%
     summarise(
@@ -239,10 +235,11 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
     )
 
-  # Join mean_L_zero_df to df
+  # Join WT statistics to df
   df <- df %>%
-    left_join(mean_L_zero_df, by = group_vars)
-
+    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
+    left_join(mean_zeroes, by = c(group_vars))
+    
   # Calculate Raw Shifts and Z Shifts
   df <- df %>%
     mutate(
@@ -257,7 +254,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
     )
   
   calculations <- df %>%
-    group_by(across(all_of(group_vars))) %>%
+    group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>%
     mutate(
       NG_sum = sum(NG, na.rm = TRUE),
       DB_sum = sum(DB, na.rm = TRUE),
@@ -289,6 +286,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       Zscore_r = Delta_r / WT_sd_r,
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     ) %>%
+    ungroup() %>%  # Ungroup before group_modify
+    group_by(across(all_of(group_vars))) %>%
     group_modify(~ {
       # Perform linear models only if there are enough unique conc_num_factor levels
       if (length(unique(.x$conc_num_factor)) > 1) {
@@ -354,8 +353,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
     left_join(delta_means_sds, by = group_vars)
 
   # Summary statistics for lm scores
-  lm_means_sds <- calculations %>%
-    summarise(
+  calculations <- calculations %>%
+    mutate(
       lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
       lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
       lm_mean_K = mean(lm_Score_K, na.rm = TRUE),
@@ -363,25 +362,9 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
       lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
       lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
-      lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
-      .groups = "drop"
-    )
-
-  # Add lm score means and standard deviations to calculations
-  calculations <- calculations %>%
-    mutate(
-      lm_mean_L = lm_means_sds$lm_mean_L,
-      lm_sd_L = lm_means_sds$lm_sd_L,
-      lm_mean_K = lm_means_sds$lm_mean_K,
-      lm_sd_K = lm_means_sds$lm_sd_K,
-      lm_mean_r = lm_means_sds$lm_mean_r,
-      lm_sd_r = lm_means_sds$lm_sd_r,
-      lm_mean_AUC = lm_means_sds$lm_mean_AUC,
-      lm_sd_AUC = lm_means_sds$lm_sd_AUC
-    )
-  
-  # Calculate Z-lm scores
-  calculations <- calculations %>%
+      lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
+    ) %>%
+    # Calculate Z-lm scores
     mutate(
       Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
       Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
@@ -534,7 +517,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) {
         num_plots <- length(plots)
         nrow <- ceiling(num_plots / grid_layout$ncol)
-        message("No nrow provided, automatically using nrow = ", nrow)
+        # message("No nrow provided, automatically using nrow = ", nrow)
         grid_layout$nrow <- nrow
       }
 
@@ -563,11 +546,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
         
         # Print rows being filtered out
         if (nrow(out_of_bounds_df) > 0) {
+          message("Filtered: ", config$title, "using y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]")
           message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df))
           # print(out_of_bounds_df)
         }
 
-        # Filter the valid data for plotting
         df <- df %>%
           filter(
             !is.na(.data[[config$y_var]]) &
@@ -599,6 +582,15 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
         }
       }
 
+      # Create a null plot with a "No data" message if no rows remain
+      # if (nrow(df) == 0) {
+      #   plot <- ggplot() +
+      #     geom_text(aes(0.5, 0.5), label = "No data available", size = 5) +
+      #     theme_void() + ggtitle(config$title)
+      # } else {
+      #   plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
+      # }
+
       plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
 
       # Add appropriate plot layer or helper function based on plot type
@@ -665,7 +657,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
               ymax = !!custom_ymax_expr
             ),
             color = config$error_bar_params$color,
-            linewidth = 0.1
+            linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
           )
 
         } else {
@@ -688,7 +680,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
                 ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
               ),
               color = config$error_bar_params$color,
-              linewidth = 0.1
+              linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
             )
           }
         }
@@ -793,7 +785,8 @@ generate_scatter_plot <- function(plot, config) {
         geom_abline(
           intercept = config$lm_line$intercept,
           slope = config$lm_line$slope,
-          color = smooth_color
+          color = smooth_color,
+          linewidth = 1
         )
     }
   }
@@ -1072,7 +1065,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type)
             ymin = paste0("0 - 2 * WT_sd_", var),
             ymax = paste0("0 + 2 * WT_sd_", var)
           ),
-          color = "gray"
+          color = "gray70",
+          linewidth = 0.5
         ),
         x_breaks = unique(group_data$conc_num_factor_factor),
         x_labels = as.character(unique(group_data$conc_num)),
@@ -1564,7 +1558,8 @@ main <- function() {
         )$df_with_stats
       
       message("Calculating reference strain interaction scores")
-      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
+      reference_results <- calculate_interaction_scores(df_reference_interaction_stats,
+        df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
       df_reference_calculations <- reference_results$calculations
       df_reference_interactions <- reference_results$interactions
       df_reference_interactions_joined <- reference_results$full_data