|
@@ -721,140 +721,132 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
}
|
|
}
|
|
|
|
|
|
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
|
|
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
|
|
- # Define SD bands
|
|
|
|
- sd_bands <- c(1, 2, 3)
|
|
|
|
-
|
|
|
|
- # Initialize list to store plot configurations
|
|
|
|
- configs <- list()
|
|
|
|
-
|
|
|
|
- # SD-based plots for L and K
|
|
|
|
- for (variable in c("L", "K")) {
|
|
|
|
- for (sd_band in sd_bands) {
|
|
|
|
- # Determine columns based on whether it's lm or not
|
|
|
|
- if (is_lm) {
|
|
|
|
- rank_var <- paste0(variable, "_Rank_lm")
|
|
|
|
- zscore_var <- paste0("Z_lm_", variable)
|
|
|
|
- y_label <- paste("Int Z score", variable)
|
|
|
|
- } else {
|
|
|
|
- rank_var <- paste0(variable, "_Rank")
|
|
|
|
- zscore_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
- y_label <- paste("Avg Z score", variable)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Calculate counts for annotations
|
|
|
|
- num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
|
|
- num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
|
|
|
-
|
|
|
|
- # Annotated Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
|
|
|
|
- sd_band = sd_band,
|
|
|
|
- annotations = list(
|
|
|
|
- list(
|
|
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
- y = 10,
|
|
|
|
- label = paste("Deletion Enhancers =", num_enhancers)
|
|
|
|
- ),
|
|
|
|
- list(
|
|
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
- y = -10,
|
|
|
|
- label = paste("Deletion Suppressors =", num_suppressors)
|
|
|
|
- )
|
|
|
|
- ),
|
|
|
|
- sd_band_values = sd_band,
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1,
|
|
|
|
- y_label = y_label,
|
|
|
|
- x_label = "Rank",
|
|
|
|
- legend_position = "none"
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- # Non-Annotated Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
|
|
|
|
- sd_band = sd_band,
|
|
|
|
- annotations = NULL,
|
|
|
|
- sd_band_values = sd_band,
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1,
|
|
|
|
- y_label = y_label,
|
|
|
|
- x_label = "Rank",
|
|
|
|
- legend_position = "none"
|
|
|
|
- )
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
|
|
+ sd_bands <- c(1, 2, 3)
|
|
|
|
+
|
|
|
|
+ configs <- list()
|
|
|
|
+
|
|
|
|
+ # SD-based plots for L and K
|
|
|
|
+ for (variable in c("L", "K")) {
|
|
|
|
+ for (sd_band in sd_bands) {
|
|
|
|
+ # Determine columns based on whether it's lm or not
|
|
|
|
+ if (is_lm) {
|
|
|
|
+ rank_var <- paste0(variable, "_Rank_lm")
|
|
|
|
+ zscore_var <- paste0("Z_lm_", variable)
|
|
|
|
+ y_label <- paste("Int Z score", variable)
|
|
|
|
+ } else {
|
|
|
|
+ rank_var <- paste0(variable, "_Rank")
|
|
|
|
+ zscore_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
+ y_label <- paste("Avg Z score", variable)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
|
|
+ num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
|
|
|
+
|
|
|
|
+ # Annotated plot configuration
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
|
|
|
|
+ sd_band = sd_band,
|
|
|
|
+ annotations = list(
|
|
|
|
+ list(
|
|
|
|
+ x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
+ y = 10,
|
|
|
|
+ label = paste("Deletion Enhancers =", num_enhancers)
|
|
|
|
+ ),
|
|
|
|
+ list(
|
|
|
|
+ x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
+ y = -10,
|
|
|
|
+ label = paste("Deletion Suppressors =", num_suppressors)
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ sd_band_values = sd_band,
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ y_label = y_label,
|
|
|
|
+ x_label = "Rank",
|
|
|
|
+ legend_position = "none"
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Non-Annotated Plot Configuration
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
|
|
|
|
+ sd_band = sd_band,
|
|
|
|
+ annotations = NULL,
|
|
|
|
+ sd_band_values = sd_band,
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ y_label = y_label,
|
|
|
|
+ x_label = "Rank",
|
|
|
|
+ legend_position = "none"
|
|
|
|
+ )
|
|
}
|
|
}
|
|
|
|
+ }
|
|
|
|
|
|
- # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
|
|
|
|
- for (variable in variables) {
|
|
|
|
- for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
|
|
|
|
- # Define x and y variables based on plot type
|
|
|
|
- if (plot_type == "Avg_Zscore_vs_lm") {
|
|
|
|
- x_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
- y_var <- paste0("Z_lm_", variable)
|
|
|
|
- title_suffix <- paste("Avg Zscore vs lm", variable)
|
|
|
|
-
|
|
|
|
- # Add rectangles configuration for Avg_Zscore_vs_lm
|
|
|
|
- rectangles <- list(
|
|
|
|
- list(
|
|
|
|
- xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
|
|
|
- fill = NA, color = "grey20", alpha = 0.1
|
|
|
|
- )
|
|
|
|
- )
|
|
|
|
- } else {
|
|
|
|
- x_var <- paste0(variable, "_Rank")
|
|
|
|
- y_var <- paste0(variable, "_Rank_lm")
|
|
|
|
- title_suffix <- paste("Rank Avg Zscore vs lm", variable)
|
|
|
|
- rectangles <- NULL
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Fit linear model
|
|
|
|
- lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
|
|
|
|
-
|
|
|
|
- # Check for perfect fit
|
|
|
|
- if (summary(lm_fit)$sigma == 0) {
|
|
|
|
- next # Skip this iteration if the fit is perfect
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Calculate R-squared
|
|
|
|
- r_squared <- summary(lm_fit)$r.squared
|
|
|
|
-
|
|
|
|
- # Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = x_var,
|
|
|
|
- y_var = y_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = title_suffix,
|
|
|
|
- annotations = list(
|
|
|
|
- list(
|
|
|
|
- x = 0,
|
|
|
|
- y = 0,
|
|
|
|
- label = paste("R-squared =", round(r_squared, 2))
|
|
|
|
- )
|
|
|
|
- ),
|
|
|
|
- sd_band_values = NULL, # Not applicable
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1,
|
|
|
|
- add_smooth = TRUE,
|
|
|
|
- lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
|
|
|
|
- legend_position = "right",
|
|
|
|
- color_var = "Overlap",
|
|
|
|
- x_label = x_var,
|
|
|
|
- y_label = y_var,
|
|
|
|
- rectangles = rectangles # Add rectangles configuration
|
|
|
|
- )
|
|
|
|
- }
|
|
|
|
|
|
+ # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
|
|
|
|
+ for (variable in variables) {
|
|
|
|
+ for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
|
|
|
|
+ # Define specific variables based on plot type
|
|
|
|
+ if (plot_type == "Avg_Zscore_vs_lm") {
|
|
|
|
+ x_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
+ y_var <- paste0("Z_lm_", variable)
|
|
|
|
+ title_suffix <- paste("Avg Zscore vs lm", variable)
|
|
|
|
+ rectangles <- list(
|
|
|
|
+ list(xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
|
|
|
+ fill = NA, color = "grey20", alpha = 0.1
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ } else {
|
|
|
|
+ x_var <- paste0(variable, "_Rank")
|
|
|
|
+ y_var <- paste0(variable, "_Rank_lm")
|
|
|
|
+ title_suffix <- paste("Rank Avg Zscore vs lm", variable)
|
|
|
|
+ rectangles <- NULL
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # Fit linear model
|
|
|
|
+ lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
|
|
|
|
+
|
|
|
|
+ # Check for perfect fit
|
|
|
|
+ if (summary(lm_fit)$sigma == 0) {
|
|
|
|
+ next # Skip this iteration if the fit is perfect
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ r_squared <- summary(lm_fit)$r.squared
|
|
|
|
+
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = x_var,
|
|
|
|
+ y_var = y_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = title_suffix,
|
|
|
|
+ annotations = list(
|
|
|
|
+ list(
|
|
|
|
+ x = 0,
|
|
|
|
+ y = 0,
|
|
|
|
+ label = paste("R-squared =", round(r_squared, 2))
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ sd_band_values = NULL, # Not applicable
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ add_smooth = TRUE,
|
|
|
|
+ lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
|
|
|
|
+ legend_position = "right",
|
|
|
|
+ color_var = "Overlap",
|
|
|
|
+ x_label = x_var,
|
|
|
|
+ y_label = y_var,
|
|
|
|
+ rectangles = rectangles # Add rectangles configuration
|
|
|
|
+ )
|
|
}
|
|
}
|
|
-
|
|
|
|
- return(configs)
|
|
|
|
|
|
+ }
|
|
|
|
+ return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
generate_correlation_plot_configs <- function(df) {
|
|
generate_correlation_plot_configs <- function(df) {
|
|
@@ -1247,16 +1239,16 @@ main <- function() {
|
|
)
|
|
)
|
|
)
|
|
)
|
|
|
|
|
|
- message("Generating quality control plots")
|
|
|
|
- generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
|
|
|
- generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
|
|
|
|
|
+ # message("Generating quality control plots")
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
|
|
|
+ # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
|
|
|
|
|
# Process background strains
|
|
# Process background strains
|
|
bg_strains <- c("YDL227C")
|
|
bg_strains <- c("YDL227C")
|