Browse Source

Reformat rank plot configs

Bryan Roessler 7 tháng trước cách đây
mục cha
commit
2ddba41950
1 tập tin đã thay đổi với 133 bổ sung141 xóa
  1. 133 141
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 133 - 141
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -721,140 +721,132 @@ generate_interaction_plot_configs <- function(df, variables) {
 }
 
 generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
-    # Define SD bands
-    sd_bands <- c(1, 2, 3)
-    
-    # Initialize list to store plot configurations
-    configs <- list()
-    
-    # SD-based plots for L and K
-    for (variable in c("L", "K")) {
-        for (sd_band in sd_bands) {
-            # Determine columns based on whether it's lm or not
-            if (is_lm) {
-                rank_var <- paste0(variable, "_Rank_lm")
-                zscore_var <- paste0("Z_lm_", variable)
-                y_label <- paste("Int Z score", variable)
-            } else {
-                rank_var <- paste0(variable, "_Rank")
-                zscore_var <- paste0("Avg_Zscore_", variable)
-                y_label <- paste("Avg Z score", variable)
-            }
-            
-            # Calculate counts for annotations
-            num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
-            num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
-            
-            # Annotated Plot Configuration
-            configs[[length(configs) + 1]] <- list(
-                df = df_filtered,
-                x_var = rank_var,
-                y_var = zscore_var,
-                plot_type = "scatter",
-                title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
-                sd_band = sd_band,
-                annotations = list(
-                    list(
-                        x = median(df_filtered[[rank_var]], na.rm = TRUE),
-                        y = 10,
-                        label = paste("Deletion Enhancers =", num_enhancers)
-                    ),
-                    list(
-                        x = median(df_filtered[[rank_var]], na.rm = TRUE),
-                        y = -10,
-                        label = paste("Deletion Suppressors =", num_suppressors)
-                    )
-                ),
-                sd_band_values = sd_band,
-                shape = 3,
-                size = 0.1,
-                y_label = y_label,
-                x_label = "Rank",
-                legend_position = "none"
-            )
-            
-            # Non-Annotated Plot Configuration
-            configs[[length(configs) + 1]] <- list(
-                df = df_filtered,
-                x_var = rank_var,
-                y_var = zscore_var,
-                plot_type = "scatter",
-                title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
-                sd_band = sd_band,
-                annotations = NULL,
-                sd_band_values = sd_band,
-                shape = 3,
-                size = 0.1,
-                y_label = y_label,
-                x_label = "Rank",
-                legend_position = "none"
-            )
-        }
+
+  sd_bands <- c(1, 2, 3)
+  
+  configs <- list()
+  
+  # SD-based plots for L and K
+  for (variable in c("L", "K")) {
+    for (sd_band in sd_bands) {
+      # Determine columns based on whether it's lm or not
+      if (is_lm) {
+        rank_var <- paste0(variable, "_Rank_lm")
+        zscore_var <- paste0("Z_lm_", variable)
+        y_label <- paste("Int Z score", variable)
+      } else {
+        rank_var <- paste0(variable, "_Rank")
+        zscore_var <- paste0("Avg_Zscore_", variable)
+        y_label <- paste("Avg Z score", variable)
+      }
+      
+      num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
+      num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
+      
+      # Annotated plot configuration
+      configs[[length(configs) + 1]] <- list(
+        df = df_filtered,
+        x_var = rank_var,
+        y_var = zscore_var,
+        plot_type = "scatter",
+        title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
+        sd_band = sd_band,
+        annotations = list(
+          list(
+            x = median(df_filtered[[rank_var]], na.rm = TRUE),
+            y = 10,
+            label = paste("Deletion Enhancers =", num_enhancers)
+          ),
+          list(
+            x = median(df_filtered[[rank_var]], na.rm = TRUE),
+            y = -10,
+            label = paste("Deletion Suppressors =", num_suppressors)
+          )
+        ),
+        sd_band_values = sd_band,
+        shape = 3,
+        size = 0.1,
+        y_label = y_label,
+        x_label = "Rank",
+        legend_position = "none"
+      )
+      
+      # Non-Annotated Plot Configuration
+      configs[[length(configs) + 1]] <- list(
+        df = df_filtered,
+        x_var = rank_var,
+        y_var = zscore_var,
+        plot_type = "scatter",
+        title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
+        sd_band = sd_band,
+        annotations = NULL,
+        sd_band_values = sd_band,
+        shape = 3,
+        size = 0.1,
+        y_label = y_label,
+        x_label = "Rank",
+        legend_position = "none"
+      )
     }
+  }
     
-    # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
-    for (variable in variables) {
-        for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
-            # Define x and y variables based on plot type
-            if (plot_type == "Avg_Zscore_vs_lm") {
-                x_var <- paste0("Avg_Zscore_", variable)
-                y_var <- paste0("Z_lm_", variable)
-                title_suffix <- paste("Avg Zscore vs lm", variable)
-                
-                # Add rectangles configuration for Avg_Zscore_vs_lm
-                rectangles <- list(
-                    list(
-                        xmin = -2, xmax = 2, ymin = -2, ymax = 2,
-                        fill = NA, color = "grey20", alpha = 0.1
-                    )
-                )
-            } else {
-                x_var <- paste0(variable, "_Rank")
-                y_var <- paste0(variable, "_Rank_lm")
-                title_suffix <- paste("Rank Avg Zscore vs lm", variable)
-                rectangles <- NULL
-            }
-            
-            # Fit linear model
-            lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
-            
-            # Check for perfect fit
-            if (summary(lm_fit)$sigma == 0) {
-                next  # Skip this iteration if the fit is perfect
-            }
-            
-            # Calculate R-squared
-            r_squared <- summary(lm_fit)$r.squared
-            
-            # Plot Configuration
-            configs[[length(configs) + 1]] <- list(
-                df = df_filtered,
-                x_var = x_var,
-                y_var = y_var,
-                plot_type = "scatter",
-                title = title_suffix,
-                annotations = list(
-                    list(
-                        x = 0,
-                        y = 0,
-                        label = paste("R-squared =", round(r_squared, 2))
-                    )
-                ),
-                sd_band_values = NULL,  # Not applicable
-                shape = 3,
-                size = 0.1,
-                add_smooth = TRUE,
-                lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
-                legend_position = "right",
-                color_var = "Overlap",
-                x_label = x_var,
-                y_label = y_var,
-                rectangles = rectangles  # Add rectangles configuration
-            )
-        }
+  # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
+  for (variable in variables) {
+    for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
+      # Define specific variables based on plot type
+      if (plot_type == "Avg_Zscore_vs_lm") {
+        x_var <- paste0("Avg_Zscore_", variable)
+        y_var <- paste0("Z_lm_", variable)
+        title_suffix <- paste("Avg Zscore vs lm", variable)
+        rectangles <- list(
+          list(xmin = -2, xmax = 2, ymin = -2, ymax = 2,
+            fill = NA, color = "grey20", alpha = 0.1
+          )
+        )
+      } else {
+        x_var <- paste0(variable, "_Rank")
+        y_var <- paste0(variable, "_Rank_lm")
+        title_suffix <- paste("Rank Avg Zscore vs lm", variable)
+        rectangles <- NULL
+      }
+      
+      # Fit linear model
+      lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
+      
+      # Check for perfect fit
+      if (summary(lm_fit)$sigma == 0) {
+        next  # Skip this iteration if the fit is perfect
+      }
+      
+      r_squared <- summary(lm_fit)$r.squared
+      
+      configs[[length(configs) + 1]] <- list(
+        df = df_filtered,
+        x_var = x_var,
+        y_var = y_var,
+        plot_type = "scatter",
+        title = title_suffix,
+        annotations = list(
+          list(
+            x = 0,
+            y = 0,
+            label = paste("R-squared =", round(r_squared, 2))
+          )
+        ),
+        sd_band_values = NULL,  # Not applicable
+        shape = 3,
+        size = 0.1,
+        add_smooth = TRUE,
+        lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
+        legend_position = "right",
+        color_var = "Overlap",
+        x_label = x_var,
+        y_label = y_var,
+        rectangles = rectangles  # Add rectangles configuration
+      )
     }
-    
-    return(configs)
+  }
+  return(configs)
 }
 
 generate_correlation_plot_configs <- function(df) {
@@ -1247,16 +1239,16 @@ main <- function() {
       )
     )
 
-    message("Generating quality control plots")
-    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
-    generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
-    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
-    generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
-    generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
-    generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
+    # message("Generating quality control plots")
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
+    # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
+    # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
 
     # Process background strains
     bg_strains <- c("YDL227C")